llm assessment output parsed

2025-11-23 12:18:26 +01:00 · 2025-11-23 12:18:26 +01:00 · e53ac19298
parent 77162be166
commit e53ac19298
6 changed files with 1535 additions and 19 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # WCGA AI validator 

- Install the required dependencies (inside the docker folder)
+- Install the required dependencies

 ```
 pip install -r requirements.txt
@ -8,19 +8,21 @@ pip install -r requirements.txt

 # .env variable

-mllm_end_point_openai='https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview'
-mllm_api_key_openai=
-mllm_model_id_openai='gpt-4o'
+* mllm_end_point_openai='https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview'
+* mllm_api_key_openai=
+* mllm_model_id_openai='gpt-4o'

-mllm_end_point_local='https://vgpu.hiis.cloud.isti.cnr.it/api/chat'
-mllm_api_key_local=
-#mllm_model_id_local='gemma3:12b'
-mllm_model_id_local='gemma3:4b'
+* mllm_end_point_local='https://vgpu.hiis.cloud.isti.cnr.it/api/chat'
+* mllm_api_key_local=
+* #mllm_model_id_local='gemma3:12b'
+* mllm_model_id_local='gemma3:4b'

 use_openai_model='False' # use 'True' if openai else False

 ## For the CLI version use:
-wcag_validator.py
+python wcag_validator.py

-## For the restService use:
-python wcag_validator_RESTserver.py
+## For the RESTservice use:
+python wcag_validator_RESTserver.py
+
+## The scripts folder contains some elaboration scripts. They require a dedicated requirements file
--- a/dependences/mllm_management.py
+++ b/dependences/mllm_management.py
@ -1,5 +1,6 @@
 from dependences.utils import call_API_urlibrequest, encode_image_from_url
-
+import json
+import re

 class MLLMManager:
    def __init__(self, end_point, api_key, model_id):
@ -228,3 +229,79 @@ class MLLMManager:
            }
            mllm_responses.append(report)
        return mllm_responses
+
+
+
+
+
+
+def parse_mllm_alt_text_response(mllm_response):
+    """
+    Parse an MLLM response string and extract key attributes into a JSON object.
+
+    from mllm response like:
+    ```json\n{\n\"Original alt-text assessment\"... etc
+    to a structured dictionary.
+    
+    Args:
+        mllm_response (str): The raw MLLM response text containing JSON data
+        
+    Returns:
+        dict: A dictionary containing the extracted attributes, or None if parsing fails
+    """
+    try:
+        # Handle NaN or None values
+        if mllm_response is None or mllm_response == "":
+            return {
+                "original_alt_text_assessment": None,
+                "assessment": None,
+                "evaluation_result": None,
+                "new_alt_text": None
+            }
+        
+        # Extract JSON content between ```json and ``` markers
+        json_match = re.search(r'```json\s*(.*?)\s*```', mllm_response, re.DOTALL)
+        
+        if not json_match:
+            # Try to find JSON without markdown code blocks
+            json_match = re.search(r'\{.*\}', mllm_response, re.DOTALL)
+            
+        if not json_match:
+            return {
+                "original_alt_text_assessment": None,
+                "assessment": None,
+                "evaluation_result": None,
+                "new_alt_text": None
+            }
+            
+        json_str = json_match.group(1) if '```json' in mllm_response else json_match.group(0)
+        
+        # Parse the JSON string
+        parsed_data = json.loads(json_str)
+        
+        # Create a structured output with the key attributes
+        result = {
+            "original_alt_text_assessment": parsed_data.get("Original alt-text assessment", ""),
+            "assessment": parsed_data.get("Assessment", ""),
+            "evaluation_result": parsed_data.get("EvaluationResult", ""),
+            "new_alt_text": parsed_data.get("New alt-text", "")
+        }
+        
+        return result
+        
+    except json.JSONDecodeError as e:
+        print(f"JSON parsing error: {e}")
+        return {
+            "original_alt_text_assessment": None,
+            "assessment": None,
+            "evaluation_result": None,
+            "new_alt_text": None
+        }
+    except Exception as e:
+        print(f"Error parsing MLLM response: {e}")
+        return {
+            "original_alt_text_assessment": None,
+            "assessment": None,
+            "evaluation_result": None,
+            "new_alt_text": None
+        }
--- a/restserver/routers/routes_wcag_alttext.py
+++ b/restserver/routers/routes_wcag_alttext.py
@ -13,7 +13,7 @@ from dependences.utils import (
    db_persistence_insert,
 )
 from dependences.image_extractor import ImageExtractor
-from dependences.mllm_management import MLLMManager
+from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response

 invalid_json_input_msg = "Invalid JSON format"
 unexpected_error_msg = "Unexpected Error: could not end the process"
@ -105,14 +105,25 @@ class WCAGAltTextValuationRoutes:
            # Create MLLM manager
            mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id)
            logging.info("mllm_manager.end_point:%s", mllm_manager.end_point)
+            # Make alt text evaluation
            mllm_responses = mllm_manager.make_alt_text_evaluation(
                images,
                openai_model=self.mllm_settings["openai_model"],
            )
+            # Parse MLLM responses 
+            for i, response in enumerate(mllm_responses):
+                parsed_resp = parse_mllm_alt_text_response(response["mllm_response"])
+                mllm_responses[i]["mllm_response"] = parsed_resp

-            mllm_responses_object = {"mllm_model_id": mllm_model_id, "mllm_responses": mllm_responses}
-            
-            returned_object = {"images": images, "mllm_validations": mllm_responses_object}
+            mllm_responses_object = {
+                "mllm_model_id": mllm_model_id,
+                "mllm_alttext_assessments": mllm_responses,
+            }
+
+            returned_object = {
+                "images": images,
+                "mllm_validations": mllm_responses_object,
+            }

            try:
                # Persist to local db
@ -140,7 +151,7 @@ class WCAGAltTextValuationRoutes:

                # save mllm responses
                with open(
-                    output_dir + "/mllm_responses.json", "w", encoding="utf-8"
+                    output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8"
                ) as f:
                    json.dump(mllm_responses, f, indent=2, ensure_ascii=False)

--- a/scripts/manage_mllm_response.ipynb
+++ b/scripts/manage_mllm_response.ipynb
--- a/scripts/requirements_extra.txt
+++ b/scripts/requirements_extra.txt
@ -0,0 +1,8 @@
+pandas==2.3.3
+scipy==1.15.3
+torch==2.9.1
+transformers==4.57.1
+numpy==2.2.6
+matplotlib==3.10.7
+scikit-learn==1.7.2
+sentence-transformers==5.1.2
--- a/wcag_validator.py
+++ b/wcag_validator.py
@ -19,7 +19,7 @@ warnings.filterwarnings("ignore")
 exception_msg = "Exception: %s"

 from dependences.image_extractor import ImageExtractor
-from dependences.mllm_management import MLLMManager
+from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response


 async def cli(sys_argv):
@ -158,10 +158,15 @@ async def cli(sys_argv):
        openai_model=args.openai_model,
    )

+    # Parse MLLM responses
+    for i, response in enumerate(mllm_responses):
+        parsed_resp = parse_mllm_alt_text_response(response["mllm_response"])
+        mllm_responses[i]["mllm_response"] = parsed_resp
+
    # save mllm responses
    if disclaim_bool_string(args.save_elaboration) == True:  # Optionally save to JSON

-        with open(output_dir + "/mllm_responses.json", "w", encoding="utf-8") as f:
+        with open(output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8") as f:
            json.dump(mllm_responses, f, indent=2, ensure_ascii=False)

    print("--- Second task completed: MLLM management \n")