diff --git a/scripts/esercitazione_12_2025/README.md b/scripts/esercitazione_12_2025/README.md index 21c5468..b089f06 100644 --- a/scripts/esercitazione_12_2025/README.md +++ b/scripts/esercitazione_12_2025/README.md @@ -10,4 +10,5 @@ - [analisi_esercitazione_12_2025_build_full_dataset](analisi_esercitazione_12_2025_build_full_dataset) rerun all the features building using pandas apply - [analisi_esercitazione_12_2025_clip](analisi_esercitazione_12_2025_clip) run CLIP score calculation between image and alt-text - [analisi_esercitazione_12_2025_inter_user_agreement](analisi_esercitazione_12_2025_inter_user_agreement) calculate inter-user agreements and inter LLM runs agreements -- [analisi_esercitazione_12_2025_distributions_comparison](analisi_esercitazione_12_2025_distributions_comparison) perform some indicator calculations to compare two candidates distrubutions with a reference one \ No newline at end of file +- [analisi_esercitazione_12_2025_distributions_comparison](analisi_esercitazione_12_2025_distributions_comparison) perform some indicator calculations to compare two candidates distrubutions with a reference one +- [analisi_esercitazione_12_2025_classificatore_LLM](analisi_esercitazione_12_2025_classificatore_LLM) Evaluate LLM classifier performance on the original alt-text assessment (0-1 classification problem) \ No newline at end of file diff --git a/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_build_full_dataset.ipynb b/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_build_full_dataset.ipynb index 533369b..820710d 100644 --- a/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_build_full_dataset.ipynb +++ b/scripts/esercitazione_12_2025/analisi_esercitazione_12_2025_build_full_dataset.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 1, "id": "a9927753", "metadata": {}, "outputs": [], @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7bc18194", "metadata": {}, "outputs": [ @@ -514,7 +514,7 @@ "[494 rows x 19 columns]" ] }, - "execution_count": 56, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 20, "id": "51fc089c", "metadata": {}, "outputs": [], @@ -550,7 +550,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 21, "id": "35287f2f", "metadata": {}, "outputs": [], @@ -586,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 22, "id": "afe0b9d9", "metadata": {}, "outputs": [], @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 23, "id": "edfc52c6", "metadata": {}, "outputs": [], @@ -611,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 24, "id": "d23e53a7", "metadata": {}, "outputs": [], @@ -622,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 25, "id": "94c7fc52", "metadata": {}, "outputs": [], @@ -638,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 26, "id": "0cbd5df3", "metadata": {}, "outputs": [], @@ -655,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 27, "id": "0604b77b", "metadata": {}, "outputs": [], @@ -665,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 28, "id": "4a730244", "metadata": {}, "outputs": [ @@ -1378,7 +1378,7 @@ "[17 rows x 24 columns]" ] }, - "execution_count": 66, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1390,7 +1390,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 29, "id": "d5590552", "metadata": {}, "outputs": [], @@ -1401,7 +1401,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 30, "id": "403f6dff", "metadata": {}, "outputs": [], @@ -1430,18 +1430,18 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 31, "id": "cde1b613", "metadata": {}, "outputs": [], "source": [ "# Apply the function with language based on english_site column (NB forse meglio farlo in base stima stessa della lingua. In entrambi i casi ci sono sbagli\n", - "#( in base english_site column sbaglio utenti che hanno sempre scritto in ita e LLM quando scrivono in inglese invece che in ita, in base stima lingua sbaglio se lo stimatore sbaglia. Forse questa ha meno errori)\n", + "#( in base english_site column sbaglio utenti che hanno \"quasi sempre\" scritto in ita e LLM quando scrivono in inglese invece che in ita, in base stima lingua sbaglio se lo stimatore sbaglia. Forse questa ha meno errori)\n", "df[['flesch_reading_ease', 'gunning_fog_index']] = df.apply(\n", " lambda row: extract_readability_indicators(\n", " row['llm_alt_text_ita'], #row['llm_alt_text'], \n", " #language='en' if row['english_site'] else 'it'\n", - " language='it' #if row['english_site'] else 'it' # so che testo in italiano\n", + " language='it' #if row['english_site'] else 'it' # so che testo in italiano (tradotto)\n", " ), \n", " axis=1\n", ")" @@ -1449,7 +1449,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": null, "id": "cd737634", "metadata": {}, "outputs": [ @@ -1466,11 +1466,41 @@ ] } ], + "source": [ + "#in reltà utenti ogni tanto hanno scritto in inglese se sito in inglese\n", + "df[['user_flesch_reading_ease', 'user_gunning_fog_index']] = df.apply(\n", + " lambda row: extract_readability_indicators(\n", + " row['user_alt_text'], \n", + " language='it' #if row['english_site'] else 'it' # gli utenti hanno quasi sempre scritto in italiano\n", + " ), \n", + " axis=1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "24ea6531", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sono if\n", + "sono if\n", + "sono if\n", + "sono if\n", + "sono if\n", + "sono if\n" + ] + } + ], "source": [ "df[['user_flesch_reading_ease', 'user_gunning_fog_index']] = df.apply(\n", " lambda row: extract_readability_indicators(\n", " row['user_alt_text'], \n", - " language='it' #if row['english_site'] else 'it' # gli utenti hanno sempre scritto in italiano\n", + " language='en' if row['user_alt_text_english'] else 'it' # uso la stima della lingua\n", " ), \n", " axis=1\n", ")" @@ -3203,7 +3233,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 33, "id": "dc37fcf7", "metadata": {}, "outputs": [ @@ -3258,8 +3288,8 @@ "
| \n", + " | User LLM gunning_fog_index similarities Pearson | \n", + "User LLM gunning_fog_index similarities Spearman | \n", + "User LLM gunning_fog_index similarities Kendall Tau | \n", + "
|---|---|---|---|
| Results | \n", + "0.499295 | \n", + "0.524163 | \n", + "0.363532 | \n", + "
| \n", + " | User-LLM Pearson | \n", + "User-LLM Spearman | \n", + "User-LLM Kendall Tau | \n", + "
|---|---|---|---|
| Results | \n", + "0.624645 | \n", + "0.593628 | \n", + "0.509181 | \n", + "
| \n", + " | lexical_similarity | \n", + "semantic_similarity | \n", + "bert_score_similarity | \n", + "
|---|---|---|---|
| count | \n", + "494.000000 | \n", + "494.000000 | \n", + "494.000000 | \n", + "
| mean | \n", + "0.381079 | \n", + "0.674069 | \n", + "0.694326 | \n", + "
| std | \n", + "0.238313 | \n", + "0.207774 | \n", + "0.153447 | \n", + "
| min | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| 25% | \n", + "0.194314 | \n", + "0.568537 | \n", + "0.622227 | \n", + "
| 50% | \n", + "0.348274 | \n", + "0.713125 | \n", + "0.711912 | \n", + "
| 75% | \n", + "0.545991 | \n", + "0.832455 | \n", + "0.790161 | \n", + "
| max | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "
| \n", + " | page_url | \n", + "user | \n", + "image_url | \n", + "original_alt_text | \n", + "user_alt_text | \n", + "llm_alt_text | \n", + "user_assessment | \n", + "llm_assessment | \n", + "user_llm_assessment | \n", + "llm_model | \n", + "html_context | \n", + "immediate_context | \n", + "nearby_context | \n", + "page_title | \n", + "page_description | \n", + "page_keywords | \n", + "llm_evaluation_result | \n", + "llm_judgment | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{\"username\": \"Galesi\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{*ultra-black-8542384*} | \n", + "Mutanda lunga aderente di colore nero (indossa... | \n", + "Simond Men's MT500 Merino Wool Boxer Briefs in... | \n", + "1 | \n", + "1 | \n", + "3 | \n", + "gpt-4o | \n", + "<span>: Vendor: <h3>: Simond Men's MT500 Merin... | \n", + "No immediate context found | \n", + "No nearby text found | \n", + "Men's Outdoor Apparel – Decathlon | \n", + "Shop our selection of outdoor clothes and gear... | \n", + "NaN | \n", + "The alt-text '*ultra-black-8542384*' is inadeq... | \n", + "failure | \n", + "
| 1 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{\"username\": \"Galesi\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "Forclaz MT500 Lightweight Packable Hiking Sandals | \n", + "Sandalo grigio con suola bassa (modello Forcla... | \n", + "Forclaz MT500 Lightweight Packable Hiking Sand... | \n", + "4 | \n", + "4 | \n", + "4 | \n", + "gpt-4o | \n", + "<span>: Save 33% <span>: Vendor: <h3>: Forclaz... | \n", + "No immediate context found | \n", + "<span> [154px]: Save 33% | \n", + "Men's Outdoor Apparel – Decathlon | \n", + "Shop our selection of outdoor clothes and gear... | \n", + "NaN | \n", + "The alt-text describes the product accurately ... | \n", + "success | \n", + "
| 2 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{\"username\": \"Galesi\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{*unspecified-8553119*} | \n", + "Scarpa da trekking di colore grigio scuro | \n", + "Quechua Men's MH100 Waterproof Mid Hiking Boot... | \n", + "1 | \n", + "1 | \n", + "3 | \n", + "gpt-4o | \n", + "<span>: Vendor: <h3>: Quechua Men's MH100 Wate... | \n", + "No immediate context found | \n", + "No nearby text found | \n", + "Men's Outdoor Apparel – Decathlon | \n", + "Shop our selection of outdoor clothes and gear... | \n", + "NaN | \n", + "The alt-text is not appropriate as it does not... | \n", + "failure | \n", + "
| 3 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{\"username\": \"Galesi\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{*carbon-gray-8572546*} | \n", + "pantaloni grigio scuri con cintura scura. Sono... | \n", + "Men's Travel 100 Cargo Pants in carbon gray by... | \n", + "1 | \n", + "1 | \n", + "3 | \n", + "gpt-4o | \n", + "<span>: Vendor: <h3>: Forclaz Men's Travel 100... | \n", + "No immediate context found | \n", + "No nearby text found | \n", + "Men's Outdoor Apparel – Decathlon | \n", + "Shop our selection of outdoor clothes and gear... | \n", + "NaN | \n", + "The original alt-text, '*carbon-gray-8572546*'... | \n", + "failure | \n", + "
| 4 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{\"username\": \"Galesi\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "{*laurel-green-8749613*} | \n", + "ragazzo in tuta sportiva, indossa felpa verde ... | \n", + "Quechua Men's MH120 green fleece hiking jacket. | \n", + "1 | \n", + "1 | \n", + "3 | \n", + "gpt-4o | \n", + "<span>: Vendor: <h3>: Quechua Men's MH120 Flee... | \n", + "No immediate context found | \n", + "<span> [184px]: $59.99 <span> [185px]: Was\\n ... | \n", + "Men's Outdoor Apparel – Decathlon | \n", + "Shop our selection of outdoor clothes and gear... | \n", + "NaN | \n", + "The original alt-text 'laurel-green-8749613' d... | \n", + "failure | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 489 | \n", + "https://giove.isti.cnr.it/users/leonardi/nike.... | \n", + "{\"username\": \"r.dipiazza\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/nike/... | \n", + "Nike Icon Pantaloni in tessuto da basket - Uomo | \n", + "Pantaloni in tessuto da basket Nike Icon per U... | \n", + "Nike Icon Pantaloni in tessuto da basket - Uom... | \n", + "4 | \n", + "4 | \n", + "4 | \n", + "gpt-4o | \n", + "<a>: Nike Icon | \n", + "No immediate context found | \n", + "<a> [93px]: Nike Icon | \n", + "Acquista Abbigliamento da Uomo. Nike IT | \n", + "Trova l'abbigliamento da uomo Nike per lo spor... | \n", + "Acquista Abbigliamento da Uomo | \n", + "The alt-text is appropriate as it clearly iden... | \n", + "success | \n", + "
| 490 | \n", + "https://giove.isti.cnr.it/users/leonardi/nike.... | \n", + "{\"username\": \"r.dipiazza\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/nike/... | \n", + "Kobe Pantaloni da basket Therma-FIT | \n", + "Pantaloni felpati da basket Therma-FIT Kobe, c... | \n", + "Kobe Pantaloni da basket Therma-FIT | \n", + "3 | \n", + "4 | \n", + "3 | \n", + "gpt-4o | \n", + "<a>: Kobe | \n", + "No immediate context found | \n", + "<a> [93px]: Kobe | \n", + "Acquista Abbigliamento da Uomo. Nike IT | \n", + "Trova l'abbigliamento da uomo Nike per lo spor... | \n", + "Acquista Abbigliamento da Uomo | \n", + "The alt-text is appropriate as it identifies t... | \n", + "success | \n", + "
| 491 | \n", + "https://giove.isti.cnr.it/users/leonardi/nike.... | \n", + "{\"username\": \"r.dipiazza\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/nike/... | \n", + "Nike Stride Giacca da running Repel UV – Uomo | \n", + "Giacca da running nera da uomo Nike Stride, Re... | \n", + "Nike Stride Repel UV running jacket for men av... | \n", + "3 | \n", + "4 | \n", + "2 | \n", + "gpt-4o | \n", + "<a>: Nike Stride | \n", + "No immediate context found | \n", + "<a> [110px]: Nike Stride <span> [163px]: +1 | \n", + "Acquista Abbigliamento da Uomo. Nike IT | \n", + "Trova l'abbigliamento da uomo Nike per lo spor... | \n", + "Acquista Abbigliamento da Uomo | \n", + "The alt-text describes the product effectively... | \n", + "success | \n", + "
| 492 | \n", + "https://giove.isti.cnr.it/users/leonardi/nike.... | \n", + "{\"username\": \"r.dipiazza\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/nike/... | \n", + "Nike Tech Pantaloni jogger in fleece – Uomo | \n", + "Pantaloni jogger in pile da uomo Nike Tech, bi... | \n", + "Nike Tech jogger pants in fleece for men, disp... | \n", + "2 | \n", + "4 | \n", + "5 | \n", + "gpt-4o | \n", + "<a>: Nike Tech | \n", + "No immediate context found | \n", + "<a> [93px]: Nike Tech | \n", + "Acquista Abbigliamento da Uomo. Nike IT | \n", + "Trova l'abbigliamento da uomo Nike per lo spor... | \n", + "Acquista Abbigliamento da Uomo | \n", + "The alt-text provides adequate information abo... | \n", + "success | \n", + "
| 493 | \n", + "https://giove.isti.cnr.it/users/leonardi/nike.... | \n", + "{\"username\": \"r.dipiazza\"} | \n", + "https://giove.isti.cnr.it/users/leonardi/nike/... | \n", + "Nike Windrunner Piumino - Uomo | \n", + "Piumino da uomo con cappuccio, Nike, colore nero | \n", + "Nike Windrunner jacket for men, black, with vi... | \n", + "2 | \n", + "4 | \n", + "4 | \n", + "gpt-4o | \n", + "<a>: Nike Windrunner | \n", + "No immediate context found | \n", + "<a> [93px]: Nike Windrunner | \n", + "Acquista Abbigliamento da Uomo. Nike IT | \n", + "Trova l'abbigliamento da uomo Nike per lo spor... | \n", + "Acquista Abbigliamento da Uomo | \n", + "The alt-text 'Nike Windrunner Piumino - Uomo' ... | \n", + "success | \n", + "
494 rows × 18 columns
\n", + "| \n", + " | page_url | \n", + "image_url | \n", + "mean_user_assessment | \n", + "mean_llm_assessment | \n", + "ground_truth_binary | \n", + "llm_prediction_binary | \n", + "num_assessments | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "1.250000 | \n", + "1.000000 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "
| 1 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "2.000000 | \n", + "2.000000 | \n", + "0 | \n", + "0 | \n", + "3 | \n", + "
| 2 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "1.666667 | \n", + "1.333333 | \n", + "0 | \n", + "0 | \n", + "3 | \n", + "
| 3 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "1.000000 | \n", + "1.000000 | \n", + "0 | \n", + "0 | \n", + "3 | \n", + "
| 4 | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "https://giove.isti.cnr.it/users/leonardi/decat... | \n", + "1.000000 | \n", + "1.000000 | \n", + "0 | \n", + "0 | \n", + "3 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 152 | \n", + "https://giove.isti.cnr.it/users/manca/eBay.html | \n", + "https://giove.isti.cnr.it/users/manca/eBay/s-l... | \n", + "4.000000 | \n", + "4.000000 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "
| 153 | \n", + "https://giove.isti.cnr.it/users/manca/eBay.html | \n", + "https://giove.isti.cnr.it/users/manca/eBay/s-l... | \n", + "3.000000 | \n", + "3.000000 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "
| 154 | \n", + "https://giove.isti.cnr.it/users/manca/eBay.html | \n", + "https://giove.isti.cnr.it/users/manca/eBay/s-l... | \n", + "3.250000 | \n", + "4.500000 | \n", + "1 | \n", + "1 | \n", + "4 | \n", + "
| 155 | \n", + "https://giove.isti.cnr.it/users/manca/eBay.html | \n", + "https://giove.isti.cnr.it/users/manca/eBay/s-l... | \n", + "1.000000 | \n", + "1.000000 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "
| 156 | \n", + "https://giove.isti.cnr.it/users/manca/eBay.html | \n", + "https://giove.isti.cnr.it/users/manca/eBay/s-l... | \n", + "1.000000 | \n", + "1.000000 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "
157 rows × 7 columns
\n", + "| \n", + " | Num Assessments | \n", + "Num Images | \n", + "Accuracy | \n", + "Agreement | \n", + "
|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "15 | \n", + "0.666667 | \n", + "10 | \n", + "
| 1 | \n", + "2 | \n", + "6 | \n", + "1.000000 | \n", + "6 | \n", + "
| 2 | \n", + "3 | \n", + "89 | \n", + "0.932584 | \n", + "83 | \n", + "
| 3 | \n", + "4 | \n", + "36 | \n", + "1.000000 | \n", + "36 | \n", + "
| 4 | \n", + "5 | \n", + "10 | \n", + "1.000000 | \n", + "10 | \n", + "
| 5 | \n", + "6 | \n", + "1 | \n", + "1.000000 | \n", + "1 | \n", + "
| \n", + " | clip_score_user | \n", + "clip_score_llm | \n", + "clip_score_llm_1 | \n", + "clip_score_llm_2 | \n", + "
|---|---|---|---|---|
| count | \n", + "494.000000 | \n", + "494.000000 | \n", + "494.000000 | \n", + "494.000000 | \n", + "
| mean | \n", + "24.719324 | \n", + "26.108775 | \n", + "24.920970 | \n", + "26.065949 | \n", + "
| std | \n", + "5.337241 | \n", + "4.125126 | \n", + "5.350998 | \n", + "4.387812 | \n", + "
| min | \n", + "0.000000 | \n", + "10.382000 | \n", + "5.359000 | \n", + "12.741000 | \n", + "
| 25% | \n", + "21.990000 | \n", + "23.717250 | \n", + "22.171000 | \n", + "23.234250 | \n", + "
| 50% | \n", + "25.608500 | \n", + "26.526000 | \n", + "25.914000 | \n", + "26.522500 | \n", + "
| 75% | \n", + "28.056000 | \n", + "28.642000 | \n", + "28.274750 | \n", + "29.103000 | \n", + "
| max | \n", + "38.108000 | \n", + "38.108000 | \n", + "39.109000 | \n", + "38.338000 | \n", + "