wcag_AI_validation/scripts/manage_mllm_response.ipynb

1433 lines
75 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "61ea49f8",
"metadata": {},
"source": [
"# 1) semantic, sparse, bertscore, lexical similarity between LLM on new_alt_text proposal "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d50c12ab",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nicola\\anaconda3\\envs\\accessibility\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import torch\n",
"import numpy as np\n",
"from transformers import BertTokenizer, BertModel\n",
"\n",
"def cosine_similarity(a, b):\n",
" return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n",
"\n",
"\n",
"#funziona meglio che sentence_embeddings perchè il tokenizer skippa token speciali\n",
"def bert_score(reference, candidate, return_similarity_matrix=False):\n",
" \n",
" # Load the BERT tokenizer and model\n",
" tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")\n",
" model = BertModel.from_pretrained(\"bert-base-uncased\")\n",
" \n",
" # Tokenize the input text\n",
" ref_tokens = tokenizer(reference, return_tensors=\"pt\", add_special_tokens=False)#add_special_tokens=False mi evita codifica dei caratteri speciali\n",
" can_tokens = tokenizer(candidate, return_tensors=\"pt\", add_special_tokens=False)\n",
" print(\"ref_tokens:\",ref_tokens)\n",
" print(\"can_tokens:\",can_tokens)\n",
"\n",
" # Get the BERT embeddings\n",
" model.eval()\n",
" with torch.no_grad():\n",
" ref_outputs = model(**ref_tokens)\n",
" ref_embeddings = ref_outputs.last_hidden_state[0]\n",
"\n",
" can_outputs = model(**can_tokens)\n",
" can_embeddings = can_outputs.last_hidden_state[0]\n",
" #print(\"can_embeddings:\",can_embeddings,can_embeddings.shape)\n",
" \n",
" # Compute cosine similarities\n",
" cosine_similarities = np.zeros((can_embeddings.shape[0], ref_embeddings.shape[0]))\n",
" for i, c in enumerate(can_embeddings):\n",
" for j, r in enumerate(ref_embeddings):\n",
" cosine_similarities[i, j] = cosine_similarity(c, r)\n",
" \n",
"\n",
" # Align cosine similarities\n",
" max_similarities = cosine_similarities.max(axis=1)\n",
"\n",
" # Average similarity scores\n",
" bertscore = max_similarities.mean()\n",
"\n",
" if return_similarity_matrix:\n",
" return bertscore, cosine_similarities\n",
" else:\n",
" return bertscore"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "83aeef57",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"def preprocess_text(text):\n",
" # Lowercase the text\n",
" text = text.lower()\n",
" # Remove punctuation\n",
" text = re.sub(r'[^\\w\\s]', '', text)\n",
" return text"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a14006c3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ref_tokens: {'input_ids': tensor([[3899]]), 'token_type_ids': tensor([[0]]), 'attention_mask': tensor([[1]])}\n",
"can_tokens: {'input_ids': tensor([[29145]]), 'token_type_ids': tensor([[0]]), 'attention_mask': tensor([[1]])}\n",
"0.4610232412815094\n",
"[[0.46102324]]\n",
"(1, 1)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\nicola\\AppData\\Local\\Temp\\ipykernel_20916\\1344219625.py:6: DeprecationWarning: __array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword\n",
" return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n"
]
}
],
"source": [
"#just a test\n",
"bertscore, sim_matrix=bert_score(\"dog\", \"zebra\",return_similarity_matrix=True)\n",
"print(bertscore)\n",
"print(sim_matrix)\n",
"print(sim_matrix.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "239aa298",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "cf109d62",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image_url</th>\n",
" <th>alt_text</th>\n",
" <th>mllm_response</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/fd...</td>\n",
" <td>A shot through a window of a wasteland in Pokr...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/72...</td>\n",
" <td>A man in a Caracas market speaks while holding...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>https://ichef.bbci.co.uk/ace/standard/480/cpsp...</td>\n",
" <td>A man stands in a pile of rubble with a damage...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>A collage showing a man in a suit with a newsp...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/2b...</td>\n",
" <td>A composite image of Rupert Grint as an adult,...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>The Global Story, The Global Story, Is there a...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>https://ichef.bbci.co.uk/news/1024/cpsprodpb/3...</td>\n",
" <td>The pink facade of the Saudades art deco-style...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/66...</td>\n",
" <td>Lando Norris speaking to Max Verstappen after ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/10...</td>\n",
" <td>The Northern Lights snake across the curvature...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/e1...</td>\n",
" <td>A woman cries as she holds her injured child, ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image_url \\\n",
"0 https://ichef.bbci.co.uk/news/480/cpsprodpb/fd... \n",
"1 https://ichef.bbci.co.uk/news/480/cpsprodpb/72... \n",
"2 https://ichef.bbci.co.uk/ace/standard/480/cpsp... \n",
"3 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"4 https://ichef.bbci.co.uk/news/480/cpsprodpb/2b... \n",
"5 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"6 https://ichef.bbci.co.uk/news/1024/cpsprodpb/3... \n",
"7 https://ichef.bbci.co.uk/news/480/cpsprodpb/66... \n",
"8 https://ichef.bbci.co.uk/news/480/cpsprodpb/10... \n",
"9 https://ichef.bbci.co.uk/news/480/cpsprodpb/e1... \n",
"\n",
" alt_text \\\n",
"0 A shot through a window of a wasteland in Pokr... \n",
"1 A man in a Caracas market speaks while holding... \n",
"2 A man stands in a pile of rubble with a damage... \n",
"3 A collage showing a man in a suit with a newsp... \n",
"4 A composite image of Rupert Grint as an adult,... \n",
"5 The Global Story, The Global Story, Is there a... \n",
"6 The pink facade of the Saudades art deco-style... \n",
"7 Lando Norris speaking to Max Verstappen after ... \n",
"8 The Northern Lights snake across the curvature... \n",
"9 A woman cries as she holds her injured child, ... \n",
"\n",
" mllm_response \n",
"0 {'original_alt_text_assessment': '4', 'assessm... \n",
"1 {'original_alt_text_assessment': '4', 'assessm... \n",
"2 {'original_alt_text_assessment': '4', 'assessm... \n",
"3 {'original_alt_text_assessment': '4', 'assessm... \n",
"4 {'original_alt_text_assessment': '3', 'assessm... \n",
"5 {'original_alt_text_assessment': '3', 'assessm... \n",
"6 {'original_alt_text_assessment': '2', 'assessm... \n",
"7 {'original_alt_text_assessment': '4', 'assessm... \n",
"8 {'original_alt_text_assessment': '4', 'assessm... \n",
"9 {'original_alt_text_assessment': '4', 'assessm... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mllm_response=pd.read_json(\"C:\\cartella_condivisa\\MachineLearning\\HIISlab\\\\accessibility\\\\notebook_miei\\LLM_accessibility_validator\\outputs\\https_www.bbc.com_gemma3-4b_2025_11_23-09_45_08\\mllm_alttext_assessments.json\")\n",
"df_mllm_response"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "42e9a488",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'original_alt_text_assessment': '4',\n",
" 'assessment': 'failure',\n",
" 'evaluation_result': \"The alt-text accurately describes the image content but lacks context. While 'A shot through a window of a wasteland in Pokrovsk' is technically correct, it doesn't convey the significance of the image within the article. The image depicts the devastation caused by the conflict in Pokrovsk, highlighting the ongoing war and the challenges faced by Ukrainian troops. The alt-text should reflect this broader context. Its a descriptive caption, not an informative alt-text.\",\n",
" 'new_alt_text': 'Devastation in Pokrovsk, Ukraine - a city under siege during the ongoing conflict.'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mllm_response.iloc[0]['mllm_response']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "15ae0f66",
"metadata": {},
"outputs": [],
"source": [
"def apply_parser_to_dataframe(df, column_name='mllm_response'):\n",
" \"\"\"\n",
" Apply the MLLM response parser to a dataframe column and expand results into separate columns.\n",
" \n",
" Args:\n",
" df (pd.DataFrame): The input dataframe\n",
" column_name (str): Name of the column containing MLLM responses\n",
" \n",
" Returns:\n",
" pd.DataFrame: Dataframe with additional columns for parsed attributes\n",
" \"\"\"\n",
" # Apply the parsing function to each row\n",
" #parsed_results = df[column_name].apply(parse_mllm_alt_text_response) #skipped because already parsed\n",
" parsed_results = df[column_name]\n",
" \n",
" # Convert the list of dictionaries to a DataFrame\n",
" parsed_df = pd.DataFrame(parsed_results.tolist())\n",
" \n",
" # Concatenate with the original dataframe\n",
" result_df = pd.concat([df, parsed_df], axis=1)\n",
" \n",
" return result_df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "2639f30a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image_url</th>\n",
" <th>alt_text</th>\n",
" <th>mllm_response</th>\n",
" <th>original_alt_text_assessment</th>\n",
" <th>assessment</th>\n",
" <th>evaluation_result</th>\n",
" <th>new_alt_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/fd...</td>\n",
" <td>A shot through a window of a wasteland in Pokr...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text accurately describes the image co...</td>\n",
" <td>Devastation in Pokrovsk, Ukraine - a city unde...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/72...</td>\n",
" <td>A man in a Caracas market speaks while holding...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text accurately describes the image co...</td>\n",
" <td>Man in Caracas market speaks about rising food...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>https://ichef.bbci.co.uk/ace/standard/480/cpsp...</td>\n",
" <td>A man stands in a pile of rubble with a damage...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text describes the scene accurately bu...</td>\n",
" <td>Ukraine: Man surveys damage after missile stri...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>A collage showing a man in a suit with a newsp...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is not appropriate. The image dep...</td>\n",
" <td>Satirical depiction of information overload an...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/2b...</td>\n",
" <td>A composite image of Rupert Grint as an adult,...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>failure</td>\n",
" <td>The alt-text A composite image of Rupert Grin...</td>\n",
" <td>Rupert Grint: Ron Weasley adult and child</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>The Global Story, The Global Story, Is there a...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is overly verbose and doesn't acc...</td>\n",
" <td>Protest demonstration.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>https://ichef.bbci.co.uk/news/1024/cpsprodpb/3...</td>\n",
" <td>The pink facade of the Saudades art deco-style...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" <td>2</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is inappropriate. The image depic...</td>\n",
" <td>Art Deco building facade in Mumbai, India.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/66...</td>\n",
" <td>Lando Norris speaking to Max Verstappen after ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is overly specific and doesn't ac...</td>\n",
" <td>Lando Norris speaking to Max Verstappen after ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/10...</td>\n",
" <td>The Northern Lights snake across the curvature...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is not appropriate. While it desc...</td>\n",
" <td>NASA astronaut's footage of the Northern Light...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/e1...</td>\n",
" <td>A woman cries as she holds her injured child, ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is overly descriptive and verbose...</td>\n",
" <td>Israeli strikes in Gaza a woman holds her cr...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image_url \\\n",
"0 https://ichef.bbci.co.uk/news/480/cpsprodpb/fd... \n",
"1 https://ichef.bbci.co.uk/news/480/cpsprodpb/72... \n",
"2 https://ichef.bbci.co.uk/ace/standard/480/cpsp... \n",
"3 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"4 https://ichef.bbci.co.uk/news/480/cpsprodpb/2b... \n",
"5 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"6 https://ichef.bbci.co.uk/news/1024/cpsprodpb/3... \n",
"7 https://ichef.bbci.co.uk/news/480/cpsprodpb/66... \n",
"8 https://ichef.bbci.co.uk/news/480/cpsprodpb/10... \n",
"9 https://ichef.bbci.co.uk/news/480/cpsprodpb/e1... \n",
"\n",
" alt_text \\\n",
"0 A shot through a window of a wasteland in Pokr... \n",
"1 A man in a Caracas market speaks while holding... \n",
"2 A man stands in a pile of rubble with a damage... \n",
"3 A collage showing a man in a suit with a newsp... \n",
"4 A composite image of Rupert Grint as an adult,... \n",
"5 The Global Story, The Global Story, Is there a... \n",
"6 The pink facade of the Saudades art deco-style... \n",
"7 Lando Norris speaking to Max Verstappen after ... \n",
"8 The Northern Lights snake across the curvature... \n",
"9 A woman cries as she holds her injured child, ... \n",
"\n",
" mllm_response \\\n",
"0 {'original_alt_text_assessment': '4', 'assessm... \n",
"1 {'original_alt_text_assessment': '4', 'assessm... \n",
"2 {'original_alt_text_assessment': '4', 'assessm... \n",
"3 {'original_alt_text_assessment': '4', 'assessm... \n",
"4 {'original_alt_text_assessment': '3', 'assessm... \n",
"5 {'original_alt_text_assessment': '3', 'assessm... \n",
"6 {'original_alt_text_assessment': '2', 'assessm... \n",
"7 {'original_alt_text_assessment': '4', 'assessm... \n",
"8 {'original_alt_text_assessment': '4', 'assessm... \n",
"9 {'original_alt_text_assessment': '4', 'assessm... \n",
"\n",
" original_alt_text_assessment assessment \\\n",
"0 4 failure \n",
"1 4 failure \n",
"2 4 failure \n",
"3 4 failure \n",
"4 3 failure \n",
"5 3 failure \n",
"6 2 failure \n",
"7 4 failure \n",
"8 4 failure \n",
"9 4 failure \n",
"\n",
" evaluation_result \\\n",
"0 The alt-text accurately describes the image co... \n",
"1 The alt-text accurately describes the image co... \n",
"2 The alt-text describes the scene accurately bu... \n",
"3 The alt-text is not appropriate. The image dep... \n",
"4 The alt-text A composite image of Rupert Grin... \n",
"5 The alt-text is overly verbose and doesn't acc... \n",
"6 The alt-text is inappropriate. The image depic... \n",
"7 The alt-text is overly specific and doesn't ac... \n",
"8 The alt-text is not appropriate. While it desc... \n",
"9 The alt-text is overly descriptive and verbose... \n",
"\n",
" new_alt_text \n",
"0 Devastation in Pokrovsk, Ukraine - a city unde... \n",
"1 Man in Caracas market speaks about rising food... \n",
"2 Ukraine: Man surveys damage after missile stri... \n",
"3 Satirical depiction of information overload an... \n",
"4 Rupert Grint: Ron Weasley adult and child \n",
"5 Protest demonstration. \n",
"6 Art Deco building facade in Mumbai, India. \n",
"7 Lando Norris speaking to Max Verstappen after ... \n",
"8 NASA astronaut's footage of the Northern Light... \n",
"9 Israeli strikes in Gaza a woman holds her cr... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mllm_response_parsed=apply_parser_to_dataframe(df_mllm_response)\n",
"df_mllm_response_parsed"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3a69075e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image_url</th>\n",
" <th>alt_text</th>\n",
" <th>mllm_response</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/fd...</td>\n",
" <td>A shot through a window of a wasteland in Pokr...</td>\n",
" <td>{'original_alt_text_assessment': '5', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/72...</td>\n",
" <td>A man in a Caracas market speaks while holding...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>https://ichef.bbci.co.uk/ace/standard/480/cpsp...</td>\n",
" <td>A man stands in a pile of rubble with a damage...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>A collage showing a man in a suit with a newsp...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/2b...</td>\n",
" <td>A composite image of Rupert Grint as an adult,...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>The Global Story, The Global Story, Is there a...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>https://ichef.bbci.co.uk/news/1024/cpsprodpb/3...</td>\n",
" <td>The pink facade of the Saudades art deco-style...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/66...</td>\n",
" <td>Lando Norris speaking to Max Verstappen after ...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/10...</td>\n",
" <td>The Northern Lights snake across the curvature...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/e1...</td>\n",
" <td>A woman cries as she holds her injured child, ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image_url \\\n",
"0 https://ichef.bbci.co.uk/news/480/cpsprodpb/fd... \n",
"1 https://ichef.bbci.co.uk/news/480/cpsprodpb/72... \n",
"2 https://ichef.bbci.co.uk/ace/standard/480/cpsp... \n",
"3 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"4 https://ichef.bbci.co.uk/news/480/cpsprodpb/2b... \n",
"5 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"6 https://ichef.bbci.co.uk/news/1024/cpsprodpb/3... \n",
"7 https://ichef.bbci.co.uk/news/480/cpsprodpb/66... \n",
"8 https://ichef.bbci.co.uk/news/480/cpsprodpb/10... \n",
"9 https://ichef.bbci.co.uk/news/480/cpsprodpb/e1... \n",
"\n",
" alt_text \\\n",
"0 A shot through a window of a wasteland in Pokr... \n",
"1 A man in a Caracas market speaks while holding... \n",
"2 A man stands in a pile of rubble with a damage... \n",
"3 A collage showing a man in a suit with a newsp... \n",
"4 A composite image of Rupert Grint as an adult,... \n",
"5 The Global Story, The Global Story, Is there a... \n",
"6 The pink facade of the Saudades art deco-style... \n",
"7 Lando Norris speaking to Max Verstappen after ... \n",
"8 The Northern Lights snake across the curvature... \n",
"9 A woman cries as she holds her injured child, ... \n",
"\n",
" mllm_response \n",
"0 {'original_alt_text_assessment': '5', 'assessm... \n",
"1 {'original_alt_text_assessment': '3', 'assessm... \n",
"2 {'original_alt_text_assessment': '3', 'assessm... \n",
"3 {'original_alt_text_assessment': '3', 'assessm... \n",
"4 {'original_alt_text_assessment': '4', 'assessm... \n",
"5 {'original_alt_text_assessment': '2', 'assessm... \n",
"6 {'original_alt_text_assessment': '2', 'assessm... \n",
"7 {'original_alt_text_assessment': '3', 'assessm... \n",
"8 {'original_alt_text_assessment': '4', 'assessm... \n",
"9 {'original_alt_text_assessment': '4', 'assessm... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mllm_response_1=pd.read_json(\"C:\\cartella_condivisa\\MachineLearning\\HIISlab\\\\accessibility\\\\notebook_miei\\LLM_accessibility_validator\\outputs\\https_www.bbc.com_gpt-4o_2025_11_23-09_47_49\\mllm_alttext_assessments.json\")\n",
"df_mllm_response_1"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "158aaea9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image_url</th>\n",
" <th>alt_text</th>\n",
" <th>mllm_response</th>\n",
" <th>original_alt_text_assessment</th>\n",
" <th>assessment</th>\n",
" <th>evaluation_result</th>\n",
" <th>new_alt_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/fd...</td>\n",
" <td>A shot through a window of a wasteland in Pokr...</td>\n",
" <td>{'original_alt_text_assessment': '5', 'assessm...</td>\n",
" <td>5</td>\n",
" <td>success</td>\n",
" <td>The alt-text is appropriate as it provides a d...</td>\n",
" <td>Damaged homes and wasteland in Pokrovsk, Ukrai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/72...</td>\n",
" <td>A man in a Caracas market speaks while holding...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>warning</td>\n",
" <td>The alt-text partially describes the image but...</td>\n",
" <td>Man in Caracas market holding coffee, represen...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>https://ichef.bbci.co.uk/ace/standard/480/cpsp...</td>\n",
" <td>A man stands in a pile of rubble with a damage...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>warning</td>\n",
" <td>The original alt-text provides descriptive det...</td>\n",
" <td>Destruction in Ukraine: rubble, burnt car, and...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>A collage showing a man in a suit with a newsp...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>warning</td>\n",
" <td>The alt-text describes the image but doesn't c...</td>\n",
" <td>Metaphor for restricted words on social media,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/2b...</td>\n",
" <td>A composite image of Rupert Grint as an adult,...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>success</td>\n",
" <td>The alt-text appropriately identifies the imag...</td>\n",
" <td>Rupert Grint as an adult and child, reflecting...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>https://ichef.bbci.co.uk/images/ic/480x270/p0m...</td>\n",
" <td>The Global Story, The Global Story, Is there a...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" <td>2</td>\n",
" <td>failure</td>\n",
" <td>The original alt-text does not directly descri...</td>\n",
" <td>Protesters holding 'Trump Help!' and 'Refuge P...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>https://ichef.bbci.co.uk/news/1024/cpsprodpb/3...</td>\n",
" <td>The pink facade of the Saudades art deco-style...</td>\n",
" <td>{'original_alt_text_assessment': '2', 'assessm...</td>\n",
" <td>2</td>\n",
" <td>failure</td>\n",
" <td>The alt-text is inaccurate and does not match ...</td>\n",
" <td>Yellow art deco-style building in Mumbai's Ban...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/66...</td>\n",
" <td>Lando Norris speaking to Max Verstappen after ...</td>\n",
" <td>{'original_alt_text_assessment': '3', 'assessm...</td>\n",
" <td>3</td>\n",
" <td>warning</td>\n",
" <td>The alt-text describes the image content direc...</td>\n",
" <td>Lando Norris discussing post-race matters afte...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/10...</td>\n",
" <td>The Northern Lights snake across the curvature...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>success</td>\n",
" <td>The alt-text describes the image content effec...</td>\n",
" <td>Northern Lights over Earths curvature filmed ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>https://ichef.bbci.co.uk/news/480/cpsprodpb/e1...</td>\n",
" <td>A woman cries as she holds her injured child, ...</td>\n",
" <td>{'original_alt_text_assessment': '4', 'assessm...</td>\n",
" <td>4</td>\n",
" <td>success</td>\n",
" <td>The alt-text appropriately describes the emoti...</td>\n",
" <td>A crying mother holds her injured child in a G...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image_url \\\n",
"0 https://ichef.bbci.co.uk/news/480/cpsprodpb/fd... \n",
"1 https://ichef.bbci.co.uk/news/480/cpsprodpb/72... \n",
"2 https://ichef.bbci.co.uk/ace/standard/480/cpsp... \n",
"3 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"4 https://ichef.bbci.co.uk/news/480/cpsprodpb/2b... \n",
"5 https://ichef.bbci.co.uk/images/ic/480x270/p0m... \n",
"6 https://ichef.bbci.co.uk/news/1024/cpsprodpb/3... \n",
"7 https://ichef.bbci.co.uk/news/480/cpsprodpb/66... \n",
"8 https://ichef.bbci.co.uk/news/480/cpsprodpb/10... \n",
"9 https://ichef.bbci.co.uk/news/480/cpsprodpb/e1... \n",
"\n",
" alt_text \\\n",
"0 A shot through a window of a wasteland in Pokr... \n",
"1 A man in a Caracas market speaks while holding... \n",
"2 A man stands in a pile of rubble with a damage... \n",
"3 A collage showing a man in a suit with a newsp... \n",
"4 A composite image of Rupert Grint as an adult,... \n",
"5 The Global Story, The Global Story, Is there a... \n",
"6 The pink facade of the Saudades art deco-style... \n",
"7 Lando Norris speaking to Max Verstappen after ... \n",
"8 The Northern Lights snake across the curvature... \n",
"9 A woman cries as she holds her injured child, ... \n",
"\n",
" mllm_response \\\n",
"0 {'original_alt_text_assessment': '5', 'assessm... \n",
"1 {'original_alt_text_assessment': '3', 'assessm... \n",
"2 {'original_alt_text_assessment': '3', 'assessm... \n",
"3 {'original_alt_text_assessment': '3', 'assessm... \n",
"4 {'original_alt_text_assessment': '4', 'assessm... \n",
"5 {'original_alt_text_assessment': '2', 'assessm... \n",
"6 {'original_alt_text_assessment': '2', 'assessm... \n",
"7 {'original_alt_text_assessment': '3', 'assessm... \n",
"8 {'original_alt_text_assessment': '4', 'assessm... \n",
"9 {'original_alt_text_assessment': '4', 'assessm... \n",
"\n",
" original_alt_text_assessment assessment \\\n",
"0 5 success \n",
"1 3 warning \n",
"2 3 warning \n",
"3 3 warning \n",
"4 4 success \n",
"5 2 failure \n",
"6 2 failure \n",
"7 3 warning \n",
"8 4 success \n",
"9 4 success \n",
"\n",
" evaluation_result \\\n",
"0 The alt-text is appropriate as it provides a d... \n",
"1 The alt-text partially describes the image but... \n",
"2 The original alt-text provides descriptive det... \n",
"3 The alt-text describes the image but doesn't c... \n",
"4 The alt-text appropriately identifies the imag... \n",
"5 The original alt-text does not directly descri... \n",
"6 The alt-text is inaccurate and does not match ... \n",
"7 The alt-text describes the image content direc... \n",
"8 The alt-text describes the image content effec... \n",
"9 The alt-text appropriately describes the emoti... \n",
"\n",
" new_alt_text \n",
"0 Damaged homes and wasteland in Pokrovsk, Ukrai... \n",
"1 Man in Caracas market holding coffee, represen... \n",
"2 Destruction in Ukraine: rubble, burnt car, and... \n",
"3 Metaphor for restricted words on social media,... \n",
"4 Rupert Grint as an adult and child, reflecting... \n",
"5 Protesters holding 'Trump Help!' and 'Refuge P... \n",
"6 Yellow art deco-style building in Mumbai's Ban... \n",
"7 Lando Norris discussing post-race matters afte... \n",
"8 Northern Lights over Earths curvature filmed ... \n",
"9 A crying mother holds her injured child in a G... "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mllm_response_parsed_1=apply_parser_to_dataframe(df_mllm_response_1)\n",
"df_mllm_response_parsed_1"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3992a33f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('Devastation in Pokrovsk, Ukraine - a city under siege during the ongoing conflict.',\n",
" \"Damaged homes and wasteland in Pokrovsk, Ukraine with smoke rising, highlighting war's impact on the city.\")"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reference=df_mllm_response_parsed[\"new_alt_text\"].iloc[0]\n",
"candidate=df_mllm_response_parsed_1[\"new_alt_text\"].iloc[0]\n",
"reference,candidate"
]
},
{
"cell_type": "markdown",
"id": "a72020fc",
"metadata": {},
"source": [
"# semantic similarity (bertscore)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c1dad7b8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ref_tokens: {'input_ids': tensor([[25594, 1999, 13433, 21638, 4492, 6711, 1010, 5924, 1011, 1037,\n",
" 2103, 2104, 6859, 2076, 1996, 7552, 4736, 1012]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n",
"can_tokens: {'input_ids': tensor([[ 5591, 5014, 1998, 5949, 3122, 1999, 13433, 21638, 4492, 6711,\n",
" 1010, 5924, 2007, 5610, 4803, 1010, 20655, 2162, 1005, 1055,\n",
" 4254, 2006, 1996, 2103, 1012]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1]])}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\nicola\\AppData\\Local\\Temp\\ipykernel_20916\\1344219625.py:6: DeprecationWarning: __array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword\n",
" return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n"
]
},
{
"data": {
"text/plain": [
"np.float64(0.5812176442146302)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bertscore =bert_score(reference, candidate,return_similarity_matrix=False)\n",
"bertscore"
]
},
{
"cell_type": "markdown",
"id": "e901f280",
"metadata": {},
"source": [
"## lexical similarity"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "1c2d1cff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.70703788],\n",
" [0.70703788, 1. ]])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics.pairwise import cosine_similarity as cosine_similarity_sklearn\n",
"text=[reference,candidate]\n",
"# Vectorize the text data\n",
"vectorizer = TfidfVectorizer(stop_words=None, analyzer='char', ngram_range=(1, 3))\n",
"tfidf_matrix = vectorizer.fit_transform(text)\n",
"# Compute cosine similarity between each pair of entries\n",
"cos_sim_matrix = cosine_similarity_sklearn(tfidf_matrix)\n",
"cos_sim_matrix"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f2304218",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 278)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tfidf_matrix.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d4afb728",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(0.7070378833564678)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cos_sim_lexical = cosine_similarity(tfidf_matrix[0].toarray().flatten(),tfidf_matrix[1].toarray().flatten())\n",
"cos_sim_lexical"
]
},
{
"cell_type": "markdown",
"id": "1bc19425",
"metadata": {},
"source": [
"## semantic similarity embeddings"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "b6ff8518",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 768)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sentence_transformers import SentenceTransformer\n",
"model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')\n",
"embeddings = model.encode([reference, candidate],output_value=\"sentence_embedding\")# comando per estrarre un embedding solo media di tutti gli embedding delle parole\n",
" # \"token_embeddings\" comando per estrarre un embedding per ogni parola\n",
"embeddings.shape"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "6310f4b2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"None\n"
]
}
],
"source": [
"# per capire se usa default prompt_name per differenziare i task come modelli avanzati come gemma\n",
"print(model.default_prompt_name )"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "2eb31bbb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.9999995 , 0.82111526],\n",
" [0.82111526, 1. ]], dtype=float32)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cos_sim_matrix_emb = cosine_similarity_sklearn(embeddings)\n",
"cos_sim_matrix_emb"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "93a846e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float32(0.8211156)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cos_sim_emb = cosine_similarity(embeddings[0],embeddings[1])\n",
"cos_sim_emb"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a7cf3288",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[1.0000, 0.8211],\n",
" [0.8211, 1.0000]]),\n",
" 'cosine')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# USING MODEL SIMILARITY METHOD\n",
"similarities = model.similarity(embeddings, embeddings)\n",
"similarities,model.similarity_fn_name"
]
},
{
"cell_type": "markdown",
"id": "2cfe315e",
"metadata": {},
"source": [
"## sparse encoder"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "a457ad0a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([2, 30522])\n",
"tensor([[1.0000, 0.6181],\n",
" [0.6181, 1.0000]])\n",
"Sparsity: 99.62%\n",
"Avg non-zero dimensions per embedding: 116.50\n"
]
}
],
"source": [
"from sentence_transformers import SparseEncoder\n",
"\n",
"# 1. Load a pretrained SparseEncoder model\n",
"model = SparseEncoder(\"naver/splade-cocondenser-ensembledistil\")\n",
"model.similarity_fn_name = \"cosine\" # set similarity function to cosine default is dot product\n",
"\n",
"# The sentences to encode\n",
"sentences = [reference, candidate]\n",
"\n",
"# 2. Calculate sparse embeddings by calling model.encode()\n",
"embeddings = model.encode(sentences)\n",
"print(embeddings.shape)\n",
"# [#, 30522] - sparse representation with vocabulary size dimensions\n",
"\n",
"# 3. Calculate the embedding similarities (using dot product by default)\n",
"similarities = model.similarity(embeddings, embeddings)\n",
"print(similarities)\n",
"# tensor()\n",
"\n",
"# 4. Check sparsity statistics\n",
"stats = SparseEncoder.sparsity(embeddings)\n",
"print(f\"Sparsity: {stats['sparsity_ratio']:.2%}\") # Typically >99% zeros\n",
"print(f\"Avg non-zero dimensions per embedding: {stats['active_dims']:.2f}\")"
]
},
{
"cell_type": "markdown",
"id": "326230d3",
"metadata": {},
"source": [
"# 2) Correlation analysis between LLM assessment response pairs"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "37b4d56b",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "75f16536",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([4, 4, 4, 4, 3, 3, 2, 4, 4, 4], [5, 3, 3, 3, 4, 2, 2, 3, 4, 4])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list1=df_mllm_response_parsed[\"original_alt_text_assessment\"].astype(int).tolist()\n",
"list2=df_mllm_response_parsed_1[\"original_alt_text_assessment\"].astype(int).tolist()\n",
"list1,list2"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "fa2837db",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(list1, list2)\n",
"plt.plot(\n",
" np.unique(list1),\n",
" np.poly1d(np.polyfit(list1, list2, 1))(np.unique(list1))\n",
"\n",
")\n",
"plt.xlabel(\"gemma4b\")\n",
"plt.ylabel(\"gtp-4o\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "f5576a0a",
"metadata": {},
"source": [
"### Correlation Coefficients"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "51ad4f42",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pearson</th>\n",
" <th>Spearman</th>\n",
" <th>Kendall Tau</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Results</th>\n",
" <td>0.53602</td>\n",
" <td>0.48319</td>\n",
" <td>0.422944</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Pearson Spearman Kendall Tau\n",
"Results 0.53602 0.48319 0.422944"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from scipy.stats import spearmanr, kendalltau\n",
"\n",
"pearson_correlation = np.corrcoef(list1, list2)[0, 1]\n",
"spearman_correlation, _ = spearmanr(list1, list2)\n",
"kendall_tau_correlation, _ = kendalltau(list1, list2)\n",
"\n",
"correlation_table = pd.DataFrame({\n",
" \"Pearson\": [pearson_correlation],\n",
" \"Spearman\": [spearman_correlation],\n",
" \"Kendall Tau\": [kendall_tau_correlation]\n",
"}, index=['Results'])\n",
"\n",
"correlation_table"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "accessibility",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}