LLM classifier and other updates

This commit is contained in:
Nicola Leonardi 2026-01-20 15:34:23 +01:00
parent 8f0ecc38c8
commit a3e10f6eda
5 changed files with 3590 additions and 335 deletions

View File

@ -10,4 +10,5 @@
- [analisi_esercitazione_12_2025_build_full_dataset](analisi_esercitazione_12_2025_build_full_dataset) rerun all the features building using pandas apply
- [analisi_esercitazione_12_2025_clip](analisi_esercitazione_12_2025_clip) run CLIP score calculation between image and alt-text
- [analisi_esercitazione_12_2025_inter_user_agreement](analisi_esercitazione_12_2025_inter_user_agreement) calculate inter-user agreements and inter LLM runs agreements
- [analisi_esercitazione_12_2025_distributions_comparison](analisi_esercitazione_12_2025_distributions_comparison) perform some indicator calculations to compare two candidates distrubutions with a reference one
- [analisi_esercitazione_12_2025_distributions_comparison](analisi_esercitazione_12_2025_distributions_comparison) perform some indicator calculations to compare two candidates distrubutions with a reference one
- [analisi_esercitazione_12_2025_classificatore_LLM](analisi_esercitazione_12_2025_classificatore_LLM) Evaluate LLM classifier performance on the original alt-text assessment (0-1 classification problem)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -502,6 +502,14 @@
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e298364b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
@ -1151,7 +1159,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "13cc0c39",
"metadata": {},
"outputs": [
@ -1161,7 +1169,7 @@
"np.float64(0.9155629139072847)"
]
},
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -1172,7 +1180,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"id": "ab2b4074",
"metadata": {},
"outputs": [
@ -1329,7 +1337,7 @@
"[604 rows x 4 columns]"
]
},
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -1349,7 +1357,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"id": "c7d0c991",
"metadata": {},
"outputs": [
@ -1397,7 +1405,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 38,
"id": "67b8d24f",
"metadata": {},
"outputs": [
@ -1675,234 +1683,234 @@
"</div>"
],
"text/plain": [
"user {\"username\": \"Chiara Giordano\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp 1.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp 1.0 \n",
"user {\"username\": \"Chiara Giordano\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... 1.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... 1.0 \n",
"\n",
"user {\"username\": \"Elia Grassini\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp 1.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp 1.0 \n",
"user {\"username\": \"Elia Grassini\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... 1.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... 1.0 \n",
"\n",
"user {\"username\": \"Enrica Di Rado\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp 2.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"Enrica Di Rado\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 2.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"Galesi\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp 3.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"Galesi\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 3.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"Giorgia\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp 3.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"Giorgia\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 3.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"Sara Pagliarecci\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"Sara Pagliarecci\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"a.caleo5\"} \\\n",
"user {\"username\": \"a.caleo5\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"e.covitti\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"ginevravassallo\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"gioelepasquini\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 3.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"l.novelli2@studenti.unipi.it\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"e.covitti\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"l.pecorella\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"ginevravassallo\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"lauracorti\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"gioelepasquini\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... 3.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"m.natale8\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"l.novelli2@studenti.unipi.it\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"r.dipiazza\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"user {\"username\": \"l.pecorella\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... 2.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... 1.0 \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"\n",
"user {\"username\": \"lauracorti\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"\n",
"user {\"username\": \"m.natale8\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"\n",
"user {\"username\": \"r.dipiazza\"} \\\n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"\n",
"user {\"username\": \"whitewolf\"} \n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8317909-product_... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493046-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8493310-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8501932-product_... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decathlon/8510030-product_... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(8).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500(9).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l500.webp 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(2).webp NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l960(3).webp NaN \n",
"user {\"username\": \"whitewolf\"} \n",
"image_url \n",
"https://giove.isti.cnr.it/users/leonardi/decath... 1.0 \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"https://giove.isti.cnr.it/users/leonardi/decath... NaN \n",
"... ... \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l5... 4.0 \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"https://giove.isti.cnr.it/users/manca/eBay/s-l9... NaN \n",
"\n",
"[157 rows x 16 columns]"
]
},
"execution_count": 11,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@ -1913,7 +1921,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 39,
"id": "89bf2af1",
"metadata": {},
"outputs": [
@ -2590,7 +2598,7 @@
"{\"username\": \"whitewolf\"} 1.000000 "
]
},
"execution_count": 12,
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@ -2609,7 +2617,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 10,
"id": "f9dc50a4",
"metadata": {},
"outputs": [
@ -2802,7 +2810,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "54716d09",
"metadata": {},
"outputs": [
@ -2810,9 +2818,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Mean inter-user correlation (Pearson): 0.755\n",
"Mean inter-user correlation (Spearman): 0.655\n",
"Mean inter-user correlation (Kendall): 0.603\n"
"Mean inter-LLM round correlation (Pearson): 0.755\n",
"Mean inter-LLM round correlation (Spearman): 0.655\n",
"Mean inter-LLM round correlation (Kendall): 0.603\n"
]
}
],
@ -2820,8 +2828,8 @@
"# Pivot to get llm assessments as columns\n",
"pivot_df = df.pivot_table(\n",
" index='image_url', \n",
" columns='user', \n",
" values='llm_assessment'\n",
" columns='user', #each user triggered a separate LLM call for the same image\n",
" values='llm_assessment'#inter-run consistency of the LLM \n",
")\n",
"\n",
"# Calculate pairwise correlations between all users\n",
@ -3101,6 +3109,374 @@
"outliers = df[(df['Diff'] == 4) | (df['Diff'] == -4)]\n",
"print(outliers['image_url'])"
]
},
{
"cell_type": "markdown",
"id": "4d322d5c",
"metadata": {},
"source": [
"# calcolo Cohen's k (più standard per inter-agreement)"
]
},
{
"cell_type": "markdown",
"id": "05306b11",
"metadata": {},
"source": [
"The weighting options are:\n",
"\n",
"weights='linear' - disagreement increases linearly (|3-4| = 1, |3-5| = 2)\n",
"weights='quadratic' - disagreement increases quadratically (|3-4| = 1, |3-5| = 4) - this is standard for ordinal scales\n",
"No weights (default) - treats all disagreements equally, like nominal data"
]
},
{
"cell_type": "markdown",
"id": "29cbd379",
"metadata": {},
"source": [
"### user-LLM agreement assessmnet"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "070f07f8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.2662980727659293, 0.551554654116535)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import cohen_kappa_score\n",
"\n",
"# Cohen's kappa (unweighted)\n",
"kappa = cohen_kappa_score(df['user_assessment'], df['llm_assessment'])\n",
"\n",
"# Weighted Cohen's kappa (recommended for ordinal 1-5 scale)\n",
"weighted_kappa = cohen_kappa_score(df['user_assessment'], df['llm_assessment'], weights='quadratic')\n",
"kappa, weighted_kappa"
]
},
{
"cell_type": "markdown",
"id": "c50e6dd7",
"metadata": {},
"source": [
"### inter user agreement assessmnet"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "9092e866",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean inter-user correlation (Pearson): 0.480\n",
"Mean inter-user correlation (Spearman): 0.476\n",
"Mean inter-user correlation (Kendall): 0.418\n",
"Mean inter-user Cohen's Kappa: 0.128\n",
"Mean inter-user Weighted Cohen's Kappa: 0.314\n"
]
}
],
"source": [
"from sklearn.metrics import cohen_kappa_score\n",
"import numpy as np\n",
"\n",
"# Pivot to get user assessments as columns\n",
"pivot_df = df.pivot_table(\n",
" index='image_url', \n",
" columns='user', \n",
" values='user_assessment'\n",
")\n",
"\n",
"# Calculate pairwise correlations between all users\n",
"user_correlations = pivot_df.corr(min_periods=3) # Minimum 3 common images to compute correlation\n",
"# For Spearman Rank Correlation (Monotonic relationships)\n",
"user_correlations_spearman = pivot_df.corr(method='spearman', min_periods=3)\n",
"# For Kendall Tau (Rank agreement, better for small datasets/ties)\n",
"user_correlations_kendall = pivot_df.corr(method='kendall', min_periods=3)\n",
"\n",
"# Calculate pairwise Cohen's kappa between all users\n",
"users = pivot_df.columns\n",
"n_users = len(users)\n",
"kappa_matrix = np.full((n_users, n_users), np.nan)\n",
"weighted_kappa_matrix = np.full((n_users, n_users), np.nan)\n",
"\n",
"for i, user1 in enumerate(users):\n",
" for j, user2 in enumerate(users):\n",
" if i == j:\n",
" kappa_matrix[i, j] = 1.0 # Perfect agreement with self\n",
" weighted_kappa_matrix[i, j] = 1.0\n",
" elif i < j: # Only calculate upper triangle\n",
" # Get common non-null assessments\n",
" mask = pivot_df[[user1, user2]].notna().all(axis=1)\n",
" if mask.sum() >= 3: # Minimum 3 common images\n",
" ratings1 = pivot_df.loc[mask, user1].values.astype(int) # Convert to int\n",
" ratings2 = pivot_df.loc[mask, user2].values.astype(int) # Convert to int\n",
" \n",
" # Unweighted kappa\n",
" kappa_matrix[i, j] = cohen_kappa_score(ratings1, ratings2)\n",
" kappa_matrix[j, i] = kappa_matrix[i, j] # Symmetric\n",
" \n",
" # Weighted kappa (quadratic weights for ordinal scale)\n",
" weighted_kappa_matrix[i, j] = cohen_kappa_score(ratings1, ratings2, weights='quadratic')\n",
" weighted_kappa_matrix[j, i] = weighted_kappa_matrix[i, j] # Symmetric\n",
"\n",
"# Convert to DataFrames for easier interpretation\n",
"import pandas as pd\n",
"kappa_df = pd.DataFrame(kappa_matrix, index=users, columns=users)\n",
"weighted_kappa_df = pd.DataFrame(weighted_kappa_matrix, index=users, columns=users)\n",
"\n",
"# Get mean inter-user metrics (excluding diagonal)\n",
"mask = np.triu(np.ones_like(user_correlations), k=1).astype(bool)\n",
"mean_inter_user_corr = user_correlations.where(mask).stack().mean()\n",
"\n",
"mask_spearman = np.triu(np.ones_like(user_correlations_spearman), k=1).astype(bool)\n",
"mean_inter_user_corr_spearman = user_correlations_spearman.where(mask_spearman).stack().mean()\n",
"\n",
"mask_kendall = np.triu(np.ones_like(user_correlations_kendall), k=1).astype(bool)\n",
"mean_inter_user_corr_kendall = user_correlations_kendall.where(mask_kendall).stack().mean()\n",
"\n",
"mask_kappa = np.triu(np.ones_like(kappa_df), k=1).astype(bool)\n",
"mean_inter_user_kappa = kappa_df.where(mask_kappa).stack().mean()\n",
"\n",
"mask_weighted_kappa = np.triu(np.ones_like(weighted_kappa_df), k=1).astype(bool)\n",
"mean_inter_user_weighted_kappa = weighted_kappa_df.where(mask_weighted_kappa).stack().mean()\n",
"\n",
"print(f\"Mean inter-user correlation (Pearson): {mean_inter_user_corr:.3f}\")\n",
"print(f\"Mean inter-user correlation (Spearman): {mean_inter_user_corr_spearman:.3f}\")\n",
"print(f\"Mean inter-user correlation (Kendall): {mean_inter_user_corr_kendall:.3f}\")\n",
"print(f\"Mean inter-user Cohen's Kappa: {mean_inter_user_kappa:.3f}\")\n",
"print(f\"Mean inter-user Weighted Cohen's Kappa: {mean_inter_user_weighted_kappa:.3f}\")"
]
},
{
"cell_type": "markdown",
"id": "6afa11df",
"metadata": {},
"source": [
"### inter LLM agreement assessmnet (inter-run consistency, LLM's self-consistency )"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "dfda8874",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:534: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
" warnings.warn(\n",
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\sklearn\\metrics\\_classification.py:897: RuntimeWarning: invalid value encountered in scalar divide\n",
" k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean inter-LLM correlation (Pearson): 0.755\n",
"Mean inter-LLM correlation (Spearman): 0.655\n",
"Mean inter-LLM correlation (Kendall): 0.603\n",
"Mean inter-LLM Cohen's Kappa: 0.320\n",
"Mean inter-LLM Weighted Cohen's Kappa: 0.649\n"
]
}
],
"source": [
"from sklearn.metrics import cohen_kappa_score\n",
"import numpy as np\n",
"\n",
"# Pivot to get LLM assessments as columns\n",
"pivot_df = df.pivot_table(\n",
" index='image_url', \n",
" columns='user', # each user triggered a separate LLM call for the same image\n",
" values='llm_assessment' #inter-run consistency of the LLM \n",
")\n",
"\n",
"# Calculate pairwise correlations between all users\n",
"user_correlations = pivot_df.corr(min_periods=3) # Minimum 3 common images to compute correlation\n",
"# For Spearman Rank Correlation (Monotonic relationships)\n",
"user_correlations_spearman = pivot_df.corr(method='spearman', min_periods=3)\n",
"# For Kendall Tau (Rank agreement, better for small datasets/ties)\n",
"user_correlations_kendall = pivot_df.corr(method='kendall', min_periods=3)\n",
"\n",
"# Calculate pairwise Cohen's kappa between all users\n",
"users = pivot_df.columns\n",
"n_users = len(users)\n",
"kappa_matrix = np.full((n_users, n_users), np.nan)\n",
"weighted_kappa_matrix = np.full((n_users, n_users), np.nan)\n",
"\n",
"for i, user1 in enumerate(users):\n",
" for j, user2 in enumerate(users):\n",
" if i == j:\n",
" kappa_matrix[i, j] = 1.0 # Perfect agreement with self\n",
" weighted_kappa_matrix[i, j] = 1.0\n",
" elif i < j: # Only calculate upper triangle\n",
" # Get common non-null assessments\n",
" mask = pivot_df[[user1, user2]].notna().all(axis=1)\n",
" if mask.sum() >= 3: # Minimum 3 common images\n",
" ratings1 = pivot_df.loc[mask, user1].values.astype(int) # Convert to int\n",
" ratings2 = pivot_df.loc[mask, user2].values.astype(int) # Convert to int\n",
" \n",
" # Unweighted kappa\n",
" kappa_matrix[i, j] = cohen_kappa_score(ratings1, ratings2)\n",
" kappa_matrix[j, i] = kappa_matrix[i, j] # Symmetric\n",
" \n",
" # Weighted kappa (quadratic weights for ordinal scale)\n",
" weighted_kappa_matrix[i, j] = cohen_kappa_score(ratings1, ratings2, weights='quadratic')\n",
" weighted_kappa_matrix[j, i] = weighted_kappa_matrix[i, j] # Symmetric\n",
"\n",
"# Convert to DataFrames for easier interpretation\n",
"import pandas as pd\n",
"kappa_df = pd.DataFrame(kappa_matrix, index=users, columns=users)\n",
"weighted_kappa_df = pd.DataFrame(weighted_kappa_matrix, index=users, columns=users)\n",
"\n",
"# Get mean inter-user metrics (excluding diagonal)\n",
"mask = np.triu(np.ones_like(user_correlations), k=1).astype(bool)\n",
"mean_inter_user_corr = user_correlations.where(mask).stack().mean()\n",
"\n",
"mask_spearman = np.triu(np.ones_like(user_correlations_spearman), k=1).astype(bool)\n",
"mean_inter_user_corr_spearman = user_correlations_spearman.where(mask_spearman).stack().mean()\n",
"\n",
"mask_kendall = np.triu(np.ones_like(user_correlations_kendall), k=1).astype(bool)\n",
"mean_inter_user_corr_kendall = user_correlations_kendall.where(mask_kendall).stack().mean()\n",
"\n",
"mask_kappa = np.triu(np.ones_like(kappa_df), k=1).astype(bool)\n",
"mean_inter_user_kappa = kappa_df.where(mask_kappa).stack().mean()\n",
"\n",
"mask_weighted_kappa = np.triu(np.ones_like(weighted_kappa_df), k=1).astype(bool)\n",
"mean_inter_user_weighted_kappa = weighted_kappa_df.where(mask_weighted_kappa).stack().mean()\n",
"\n",
"print(f\"Mean inter-LLM correlation (Pearson): {mean_inter_user_corr:.3f}\")\n",
"print(f\"Mean inter-LLM correlation (Spearman): {mean_inter_user_corr_spearman:.3f}\")\n",
"print(f\"Mean inter-LLM correlation (Kendall): {mean_inter_user_corr_kendall:.3f}\")\n",
"print(f\"Mean inter-LLM Cohen's Kappa: {mean_inter_user_kappa:.3f}\")\n",
"print(f\"Mean inter-LLM Weighted Cohen's Kappa: {mean_inter_user_weighted_kappa:.3f}\")"
]
},
{
"cell_type": "markdown",
"id": "98a5aa89",
"metadata": {},
"source": [
"i numeri confermano che inter-user agreemnet 0.314 < inter-llm agreemnet 0.649 e che user-llm agreement sta nel mezzo 0.552"
]
}
],
"metadata": {