#### To launch the script # gradio ui_alt_text.py # python ui_alt_text.py import gradio as gr import requests # from ..dependences.utils import call_API_urlibrequest import logging import time import json import urllib.request import urllib.parse import os import sqlite3 WCAG_VALIDATOR_RESTSERVER_HEADERS = [("Content-Type", "application/json")] url_list = [ "https://amazon.com", "https://web.archive.org/web/20251126051721/https://www.amazon.com/", "https://web.archive.org/web/20230630235957/http://www.amazon.com/", "https://ebay.com", "https://walmart.com", "https://etsy.com", "https://target.com", "https://wayfair.com", "https://bestbuy.com", "https://macys.com", "https://homedepot.com", "https://costco.com", "https://www.ansa.it", "https://en.wikipedia.org/wiki/Main_Page", "https://www.lanazione.it", "https://www.ansa.it", "https://www.bbc.com", "https://www.cnn.com", "https://www.nytimes.com", "https://www.theguardian.com", ] # ------ TODO use from utils instead of redefining here def call_API_urlibrequest( data={}, verbose=False, url="", headers=[], method="post", base=2, # number of seconds to wait max_tries=3, ): if verbose: logging.info("input_data:%s", data) # Allow multiple attempts to call the API incase of downtime. # Return provided response to user after 3 failed attempts. wait_seconds = [base**i for i in range(max_tries)] for num_tries in range(max_tries): try: if method == "get": # Encode the parameters and append them to the URL query_string = urllib.parse.urlencode(data) url_with_params = f"{url}?{query_string}" request = urllib.request.Request(url_with_params, method="GET") for ele in headers: request.add_header(ele[0], ele[1]) elif method == "post": # Convert the dictionary to a JSON formatted string and encode it to bytes data_to_send = json.dumps(data).encode("utf-8") request = urllib.request.Request(url, data=data_to_send, method="POST") for ele in headers: request.add_header(ele[0], ele[1]) else: return {"error_message": "method_not_allowed"} # Send the request and capture the response with urllib.request.urlopen(request) as response: # Read and decode the response response_json = json.loads(response.read().decode("utf-8")) logging.info("response_json:%s", response_json) logging.info("response.status_code:%s", response.getcode()) return response_json except Exception as e: logging.error("error message:%s", e) response_json = {"error": e} logging.info("num_tries:%s", num_tries) logging.info( "Waiting %s seconds before automatically trying again.", str(wait_seconds[num_tries]), ) time.sleep(wait_seconds[num_tries]) logging.info( "Tried %s times to make API call to get a valid response object", max_tries ) logging.info("Returning provided response") return response_json def create_folder(root_path, directory_separator, next_path): output_dir = root_path + directory_separator + next_path try: if not os.path.exists(output_dir): os.mkdir(output_dir) except Exception as e: logging.error(exception_msg, e) exit(1) return output_dir def db_persistence_startup( db_name_and_path="persistence/wcag_validator.db", table="wcag_validator_results", ): try: _ = create_folder( root_path=os.getcwd(), directory_separator="/", next_path="persistence", ) except Exception as e: logging.error("exception on db persistence startup:%s", e) exit(1) try: db_connection = sqlite3.connect(db_name_and_path) cursor = db_connection.cursor() # Create a table to store JSON data cursor.execute( """CREATE TABLE IF NOT EXISTS """ + table + """ ( id INTEGER PRIMARY KEY AUTOINCREMENT, insertion_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, insert_type TEXT, json_input_data TEXT, json_output_data TEXT )""" ) db_connection.commit() logging.info("connection to the database established") return db_connection except Exception as e: logging.error("db_management problem:%s", e) exit(1) def db_persistence_insert( connection_db, insert_type, json_in_str, json_out_str, table="wcag_validator_results", ): try: cursor = connection_db.cursor() # Insert JSON data into the table along with the current timestamp cursor.execute( "INSERT INTO " + table + " (insert_type,json_input_data,json_output_data) VALUES (?,?,?)", (insert_type, json_in_str, json_out_str), ) connection_db.commit() logging.info( "Data correctly saved on local db table:%s, insertion type:%s", table, insert_type, ) except Exception as e: logging.error("exception" + " %s", e) # ------- End TODO use from utils instead of redefining here # Method 1: Embed external website (works only for sites that allow iframes) def create_iframe(url): iframe_html = ( f'' ) return iframe_html def load_images_from_json(json_input): """Extract URLs and alt text from JSON and create HTML gallery""" try: data = json_input if "images" not in data or not data["images"]: return "No images found in JSON", "" images = data["images"] info_text = f"Found {len(images)} image(s)\n" print(f"Found {len(data['images'])} image(s)") # Create HTML gallery with checkboxes and assessment forms html = """ " return info_text, html except json.JSONDecodeError as e: return f"Error: Invalid JSON format - {str(e)}", "" except Exception as e: return f"Error: {str(e)}", "" def load_llm_assessment_from_json(json_input): try: # Parse JSON input data = json_input if "mllm_validations" not in data or not data["mllm_validations"]: print("no mllm_validations found") return "No mllm_validations found in JSON", [] info_text = f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)\n\n" print( f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)" ) for idx, img_data in enumerate( data["mllm_validations"]["mllm_alttext_assessments"], 1 ): original_alt_text_assessment = img_data["mllm_response"].get( "original_alt_text_assessment", "No description" ) new_alt_text = img_data["mllm_response"].get( "new_alt_text", "No description" ) alt_text_original = img_data.get("alt_text", "No alt_text provided") info_text += f"✓ alt_text original: {alt_text_original}. LLM assessment: {original_alt_text_assessment} => LLM proposed alt_text: {new_alt_text}\n" return info_text except json.JSONDecodeError as e: return f"Error: Invalid JSON format - {str(e)}", [] except Exception as e: return f"Error: {str(e)}", [] def make_alttext_llm_assessment_api_call( url, selected_images_json=[], number_of_images=30 ): print(f"Making API call to {url}") selected_images = json.loads(selected_images_json) if selected_images_json else [] print("selected_images:", selected_images) if not selected_images or len(selected_images) == 0: info_text = "No images selected" return info_text selected_urls = [] for img in selected_images: selected_urls.append(img["image_url"]) try: response = call_API_urlibrequest( data={ "page_url": url, "number_of_images": number_of_images, "context_levels": 5, "pixel_distance_threshold": 200, "save_images": "True", "save_elaboration": "True", "specific_images_urls": selected_urls, }, url="http://localhost:8000/wcag_alttext_validation", headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, ) # return response info_text = load_llm_assessment_from_json(response) return info_text except Exception as e: return {"error": str(e)} def make_image_extraction_api_call(url, number_of_images=30): print(f"Making API call to {url}") try: response = call_API_urlibrequest( data={ "page_url": url, "number_of_images": number_of_images, }, url="http://localhost:8000/extract_images", headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, ) # return response info_text, gallery_images = load_images_from_json(response) return info_text, gallery_images except Exception as e: return {"error": str(e)} # ------- Gradio Interface -------# # Global variable to hold database connection connection_db = db_persistence_startup(table="wcag_user_assessments") # Create Gradio interface with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: # Use the global connection_db reference print("Database connection reference available globally") gr.Markdown("# WCAG AI Validator UI") with gr.Tab("Alt Text Assessment"): with gr.Row(): with gr.Column(): with gr.Row(): with gr.Column(): url_input = gr.Dropdown( url_list, value=url_list[0], multiselect=False, label="Select an URL", info="Select an URL to load in iframe", ) with gr.Column(): image_extraction_api_call_btn = gr.Button( "Extract Images & Alt Text", variant="primary" ) alttext_api_call_btn = gr.Button( "Alt Text LLM Assessment", variant="secondary", interactive=False, ) with gr.Row(): image_info_output = gr.Textbox(label="Original alt-text", lines=5) alttext_info_output = gr.Textbox(label="LLM Assessment", lines=5) with gr.Row(): gallery_html = gr.HTML(label="Image Gallery") image_extraction_api_call_btn.click( fn=lambda: ("", "", "", gr.Button(interactive=False)), inputs=[], outputs=[ image_info_output, gallery_html, alttext_info_output, alttext_api_call_btn, ], ).then( make_image_extraction_api_call, inputs=[url_input], outputs=[image_info_output, gallery_html], ).then( fn=lambda: gr.Button(interactive=True), inputs=[], outputs=[alttext_api_call_btn], ) # Process selected images with JavaScript alttext_api_call_btn.click( fn=make_alttext_llm_assessment_api_call, inputs=[url_input, gallery_html], outputs=[alttext_info_output], js=""" (url_input,gallery_html) => { const checkboxes = document.querySelectorAll('.image-checkbox:checked'); if (checkboxes.length === 0) { alert('Please select at least one image!'); return [url_input,JSON.stringify([])]; } if (checkboxes.length > 3) { alert('Please select maximum 3 images!'); return [url_input,JSON.stringify([])]; } const selectedData = []; checkboxes.forEach(checkbox => { const index = checkbox.dataset.index; const imageUrl = checkbox.dataset.imgurl; const originalAlt = document.querySelector('.original-alt[data-index="' + index + '"]').value; const assessment = document.querySelector('.assessment-range[data-index="' + index + '"]').value; const newAltText = document.querySelector('.new-alt-text[data-index="' + index + '"]').value; selectedData.push({ image_url: imageUrl, original_alt_text: originalAlt, assessment: parseInt(assessment), new_alt_text: newAltText }); }); return [url_input,JSON.stringify(selectedData)]; } """, ) if __name__ == "__main__": # connection_db = db_persistence_startup(table="wcag_user_assessments") demo.launch()