from fastapi import APIRouter, Request from fastapi.responses import JSONResponse import logging from pydantic import BaseModel import json from typing import Dict, List from datetime import datetime, timezone import aiofiles import asyncio from dependences.utils import ( disclaim_bool_string, prepare_output_folder, prepare_folder_path, create_folder, db_persistence_insert, ) from dependences.image_extractor import ImageExtractor from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response invalid_json_input_msg = "Invalid JSON format" unexpected_error_msg = "Unexpected Error: could not end the process" class WCAGAltTextValuation(BaseModel): page_url: str = "https://www.bbc.com" context_levels: int = 5 pixel_distance_threshold: int = 200 number_of_images: int = 10 save_images: str = "True" save_elaboration: str = "True" specific_images_urls: List[str] = [] class WCAGAltTextValuationRoutes: def __init__(self, connection_db, mllm_settings): self.connection_db = connection_db self.mllm_settings = mllm_settings self.router = APIRouter() self.router.add_api_route( "/wcag_alttext_validation", self.wcag_alttext_validation, methods=["POST"], tags=["Wcag Alt Text Validation"], description="WCAG validator alt_text validation", name="wcag alttext validation", dependencies=[], ) logging.info("wcag alttext routes correctly initialized.") async def wcag_alttext_validation( self, request: Request, data: WCAGAltTextValuation ) -> JSONResponse: """Return the alt text validation assessment based on WCAG guidelines""" try: print("Received wcag alttext validation request.") json_content = json.loads(data.model_dump_json()) if self.mllm_settings["openai_model"] == "Both": mllm_model_id_for_logging = ( self.mllm_settings["mllm_model_id"]["model_id_remote"] + "&" + self.mllm_settings["mllm_model_id"]["model_id_local"] ) else: mllm_model_id_for_logging = self.mllm_settings["mllm_model_id"] # prepare output folders if needed--- if ( disclaim_bool_string(json_content["save_elaboration"]) == True or disclaim_bool_string(json_content["save_images"]) == True ): # if something to save url_path, folder_str = prepare_folder_path( json_content, mllm_model_id_for_logging, tecnhnique_name="g94" ) output_dir = prepare_output_folder(url_path, folder_str) if disclaim_bool_string(json_content["save_images"]) == True: images_output_dir = create_folder( output_dir, directory_separator="/", next_path="images" ) print("save images path:", images_output_dir) # --------------------- # Create extractor image_extractor = ImageExtractor( json_content["page_url"], context_levels=json_content["context_levels"], pixel_distance_threshold=json_content["pixel_distance_threshold"], number_of_images=json_content["number_of_images"], save_images=json_content["save_images"], save_images_path=images_output_dir, ) # Extract images logging.info(f"Extracting images from: {json_content['page_url']}") images = await image_extractor.extract_images( specific_images_urls=json_content["specific_images_urls"], extract_context=True, ) if self.mllm_settings["openai_model"] == "Both": from concurrent.futures import ThreadPoolExecutor def run_model_evaluation( endpoint, api_key, model_id, openai_model, label ): manager = MLLMManager(endpoint, api_key, model_id) print( f"Using {label} model for alt text evaluation.", manager.end_point, ) logging.info("mllm_end_point:%s", endpoint) logging.info("mllm_model_id:%s", model_id) responses = manager.make_alt_text_evaluation( images, openai_model=openai_model ) for i, response in enumerate(responses): responses[i]["mllm_response"] = parse_mllm_alt_text_response( response["mllm_response"], model_id=model_id ) return responses with ThreadPoolExecutor(max_workers=2) as executor: future_openai = executor.submit( run_model_evaluation, self.mllm_settings["mllm_end_point"]["model_end_point_remote"], self.mllm_settings["mllm_api_key"]["api_key_remote"], self.mllm_settings["mllm_model_id"]["model_id_remote"], True, "first remote", ) future_local = executor.submit( run_model_evaluation, self.mllm_settings["mllm_end_point"]["model_end_point_local"], self.mllm_settings["mllm_api_key"]["api_key_local"], self.mllm_settings["mllm_model_id"]["model_id_local"], False, "second local", ) mllm_responses_openai = future_openai.result() mllm_responses_local = future_local.result() mllm_responses_object = { "mllm_alttext_assessments": { "mllm_alttext_assessments_openai": mllm_responses_openai, "mllm_alttext_assessments_local": mllm_responses_local, } } else: # MLLM settings mllm_end_point = self.mllm_settings["mllm_end_point"] mllm_api_key = self.mllm_settings["mllm_api_key"] mllm_model_id = self.mllm_settings["mllm_model_id"] # Create MLLM manager mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id) print( "Using single model for alt text evaluation.", mllm_manager.end_point, ) logging.info("mllm_end_point:%s", mllm_end_point) logging.info("mllm_model_id:%s", mllm_model_id) # Make alt text evaluation mllm_responses = mllm_manager.make_alt_text_evaluation( images, openai_model=self.mllm_settings["openai_model"], ) # Parse MLLM responses for i, response in enumerate(mllm_responses): parsed_resp = parse_mllm_alt_text_response( response["mllm_response"], model_id=mllm_model_id ) mllm_responses[i]["mllm_response"] = parsed_resp mllm_responses_object = { "mllm_alttext_assessments": mllm_responses, } # common: prepare the object to return in the response returned_object = { "images": images, "mllm_validations": mllm_responses_object, } try: # Persist to local db # Convert JSON data to string json_in_str = json.dumps(images, ensure_ascii=False) json_out_str = json.dumps(mllm_responses_object, ensure_ascii=False) db_persistence_insert( connection_db=self.connection_db, insert_type="wcag_alttext_validation", page_url=json_content["page_url"], llm_model=mllm_model_id_for_logging, json_in_str=json_in_str, json_out_str=json_out_str, table="wcag_validator_results", ) except Exception as e: logging.error("error persisting to local db: %s", e) # save extracted images info if ( disclaim_bool_string(json_content["save_elaboration"]) == True ): # Optionally save to JSON await image_extractor.save_elaboration( # save also extracted images info into a dedicated json file images, output_dir=output_dir + "/extracted_images.json" ) # save mllm responses """ with open( output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8" ) as f: #json.dump(mllm_responses, f, indent=2, ensure_ascii=False) #era questo nella vesrione del primo test utenti json.dump(mllm_responses_object, f, indent=2, ensure_ascii=False)""" # async version async with aiofiles.open( output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8" ) as f: await f.write( json.dumps(mllm_responses_object, indent=2, ensure_ascii=False) ) return JSONResponse(content=returned_object, status_code=200) except json.JSONDecodeError: logging.error(invalid_json_input_msg) return JSONResponse( content={"error": invalid_json_input_msg}, status_code=400 ) except Exception as e: logging.error(unexpected_error_msg + " %s", e) return JSONResponse( content={"error": unexpected_error_msg}, status_code=500 )