#### To launch the script
# gradio wcag_validator_ui.py
# python wcag_validator_ui.py
import gradio as gr
from gradio_modal import Modal
import requests
from pathlib import Path
import sys
import pandas as pd
parent_dir = Path(__file__).parent.parent
sys.path.insert(0, str(parent_dir))
from dotenv import load_dotenv, find_dotenv
from dependences.utils import (
call_API_urlibrequest,
create_folder,
db_persistence_startup,
db_persistence_insert,
return_from_env_valid,
)
from dependences_ui.utils import *
import logging
import time
import json
import urllib.request
import urllib.parse
import os
import sqlite3
from user_task_assignment.user_assignment_manager import UserAssignmentManager
from dependences_ui.utils import load_users,get_user_assessments_done
def user_assigment_management():
users=load_users()
user_list=list(users.keys())
print(f"Loaded users from simple JSON: {len(user_list)}")
user_assignment_manager = UserAssignmentManager(
db_path="persistence/wcag_validator_ui.db",
config_json_path="user_task_assignment/sites_config.json",
assignments_json_path="user_task_assignment/alt_text_assignments_output_target_overlap.json",
assignments_xlsx_path="user_task_assignment/alt_text_assignments_output_target_overlap.xlsx",
)
# Get current managed users
managed_users_number = user_assignment_manager.get_managed_user_count()
print(f"Currently managed users from db: {managed_users_number}")
if managed_users_number !=len(user_list):# rigenenerate files only if some user numbers disalignmnets. Avoid only updates on new user registration process
print(f"Warning: Number of users in db ({managed_users_number}) does not match number of users loaded from JSON ({len(user_list)}). Re-init user assignments files.")
user_assignment_manager.register_active_users(user_list)#on startup register users loaded from JSON into the manager (creating also assignments .json amd .xml files)
# Get current managed users after regsitration alignment
managed_users_number = user_assignment_manager.get_managed_user_count()
print(f"Currently managed users from db after alignment: {managed_users_number}")
# Get current managed users after regsitration alignment
print(f"Total managed users from db: {managed_users_number}\n")
if managed_users_number !=len(user_list):
print(f"Warning: Number of users in db ({managed_users_number}) does not match number of users loaded from JSON ({len(user_list)}). Check user assignment manager initialization.")
exit(1)
user_assignment_stats = user_assignment_manager.get_statistics()
print(f"Current assignment stats:{user_assignment_stats} \n")
return user_assignment_manager
WCAG_VALIDATOR_RESTSERVER_HEADERS = [("Content-Type", "application/json")]
def maybe_close_modal(process_dataframe_output_state):
print("Checking if modal can be closed based on:",type(process_dataframe_output_state), process_dataframe_output_state)
if not process_dataframe_output_state:
print("Modal cannot be closed.")
return Modal(visible=True) # keep it open
return Modal(visible=False) # close it
def maybe_open_modal(make_alttext_llm_assessment_api_call_output_state):
print("Checking if modal can be opened based on:",type(make_alttext_llm_assessment_api_call_output_state), make_alttext_llm_assessment_api_call_output_state)
if not make_alttext_llm_assessment_api_call_output_state:
print("Modal cannot be opened.")
return Modal(visible=False)
return Modal(visible=True)
def render_user_assessmnet_status_table(df):
if df is None or df.empty:
return "
No assignments found.
"
total_work_to_be_done=[]
rows = ""
for _, row in df.iterrows():
url = row["Website URL"]
assigned = row["Assigned Image Number"]
work_done = row["Work Done on Image Number"]
work_to_be_done = [img for img in assigned if img not in work_done]
total_work_to_be_done+=work_to_be_done
rows += f"""
"""
total_work_to_be_done_text =""
if len(total_work_to_be_done)==0:
total_work_to_be_done_text="All assigned work is done! Great job!"
return f"""
Website URL
Assigned Image Number
Work Done on Image Number
Work Still to be Done
{rows}
{total_work_to_be_done_text}
"""
def display_user_assignment(db_path,user_state):
if user_state and "username" in user_state:
username = user_state["username"]
connection_db = sqlite3.connect(db_path)
user_assessment_work=get_user_assessments_done(connection_db ,username)
print(f"User {username} has done assessments for {user_assessment_work} images.")
print(f"Fetching assignment for user: {username}")
assignments = user_assignment_manager.get_user_assignments(username, from_user_name=True)
if assignments is not None:
print (f"Your current assignment: {assignments}")
else:
#return "No assignments found for you. Please contact the administrator."
return pd.DataFrame()
data_frame = []
for url in assignments :
#print(f"URL: {url}, Assigned Image List: {assignments[url]}")
data_frame.append(
{
"Website URL": url,
"Assigned Image Number": assignments[url],
"Work Done on Image Number":user_assessment_work[url] if url in user_assessment_work else [],
}
)
df = pd.DataFrame(data_frame)
#print(f"DataFrame to display for user {username}:\n{df}")
return df
else:
#return "User not logged in."
return pd.DataFrame()
def process_dataframe(db_path, url, updated_df, user_state={},llm_response_output={}):
print("Processing dataframe to adjust columns...type:",type(updated_df),updated_df)
# accept different input forms from UI (DataFrame, JSON string, or list of dicts)
try:
if isinstance(updated_df, str):
try:
updated_df = pd.read_json(updated_df, orient="records")
except Exception:
updated_df = pd.read_json(updated_df)
elif isinstance(updated_df, list):
updated_df = pd.DataFrame(updated_df)
except Exception as e:
return f"Error parsing updated data: {str(e)}" ,False
for column_rating_name in ["User Assessment for LLM Proposal 1", "User Assessment for LLM Proposal 2"]:
# Get the assessment column
try:
updated_df[column_rating_name] = updated_df[column_rating_name].astype(int)
except ValueError:
return "Error: User Assessment for LLM Proposal must be an integer",False
except KeyError:
return f"No data Saved because some images are not correcly managed. Please retry." ,False
except Exception as e:
return f"Error processing User Assessment for LLM Proposal: {str(e)}" ,False
if (updated_df[column_rating_name] < 1).any() or (
updated_df[column_rating_name] > 5
).any():
return "Error: User Assessment for LLM Proposal must be between 1 and 5",False
dataframe_json = updated_df.to_json(orient="records")
connection_db = sqlite3.connect(db_path)
json_user_str = json.dumps({"username": user_state["username"]}, ensure_ascii=False)
llm_response_output_str = json.dumps(llm_response_output, ensure_ascii=False) #recuperato dalla chiamata all'llm, ho tutte le info anche sulle immagini
try:
# insert after everything to keep datetime aligned
db_persistence_insert(
connection_db=connection_db,
insert_type="wcag_user_llm_alttext_assessments",
page_url=url,
user=json_user_str,
llm_model="",
json_in_str=llm_response_output_str,#dataframe_json,
json_out_str=dataframe_json,
table="wcag_user_assessments",
)
except Exception as e:
print("Error inserting user assessment into database:", str(e))
finally:
if connection_db:
connection_db.close()
print("User assessment saved to database successfully.returning:", True)
return "User assessment saved successfully!",True
def load_images_from_json(json_input,user_assignment_current_status_df):
"""Extract URLs and alt text from JSON and create HTML gallery"""
if user_assignment_current_status_df is None or user_assignment_current_status_df.empty:
print("No user assignment status found. Displaying all images without assignment info.")
user_assignments={}
for _, row in user_assignment_current_status_df.iterrows():
url = row["Website URL"]
assigned = row["Assigned Image Number"]
work_done = row["Work Done on Image Number"]
user_assignments[url] = {
"assigned": assigned,
"work_done": work_done
}
#print(f"User assignments extracted for image loading: {user_assignments}")
try:
data = json_input
if "images" not in data or not data["images"]:
return "No images found in JSON", ""
images = data["images"]
info_text = f"Found {len(images)} image(s)"
# Create HTML gallery with checkboxes and assessment forms
html="
Select the assigned images by clicking on the corresponding checkbox and start the evaluation.
"
html += """
"""
for idx, img_data in enumerate(images):
url = img_data.get("url", "")
alt_text = img_data.get("alt_text", "No description")
page_url = img_data.get("page_url", "")
assigned=user_assignments.get(page_url,{}).get("assigned",[])
work_done=user_assignments.get(page_url,{}).get("work_done",[])
assigned_text=""
is_assigned=False
is_work_done=False
if idx+1 in assigned:
assigned_text="-(Assigned)"
is_assigned=True
if idx+1 in work_done:
is_work_done=True
assigned_text+="->(Already managed)"
if idx+1 in assigned and idx+1 in work_done:
is_work_done=True
is_assigned=True
assigned_text+="ü"
html += f"""
{f'''
{'✓ Done' if is_work_done else '⚠ Assigned'}
''' if is_assigned else ''}
Current alt-text: {alt_text}
"""
html += "
"
return info_text, html
except json.JSONDecodeError as e:
return f"Error: Invalid JSON format - {str(e)}", ""
except Exception as e:
return f"Error: {str(e)}", ""
def load_llm_assessment_from_json(json_input):
try:
# Parse JSON input
data = json_input
if "mllm_validations" not in data or not data["mllm_validations"]:
print("no mllm_validations found")
return pd.DataFrame()
if (
data["mllm_validations"]["mllm_alttext_assessments"].get("mllm_alttext_assessments_openai")
and data["mllm_validations"]["mllm_alttext_assessments"].get("mllm_alttext_assessments_local")
):
is_single_model_output = False
info_text = f"Assessment done by {len(data['mllm_validations']['mllm_alttext_assessments'])} models on {len(data['mllm_validations']['mllm_alttext_assessments']['mllm_alttext_assessments_openai'])} image(s)\n\n"
print(
f"The response contains multiple models output. Assessment done by {len(data['mllm_validations']['mllm_alttext_assessments'])} models on {len(data['mllm_validations']['mllm_alttext_assessments']['mllm_alttext_assessments_openai'])} image(s)"
)
else:
is_single_model_output = True
info_text = f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)\n\n"
print(
f"The response contains only one output. Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)"
)
data_frame = []
if is_single_model_output:
for idx, img_data in enumerate(
data["mllm_validations"]["mllm_alttext_assessments"], 1
):
original_alt_text_assessment = img_data["mllm_response"].get(
"original_alt_text_assessment", "No description"
)
new_alt_text = img_data["mllm_response"].get(
"new_alt_text", "No description"
)
alt_text_original = img_data.get("alt_text", "No alt_text provided")
data_frame.append(
{
"Original Alt Text": alt_text_original,
"LLM Assessment": original_alt_text_assessment,
"LLM Proposed Alt Text": new_alt_text,
}
)
else:
for idx, img_data in enumerate(
data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_openai"], 1
):
original_alt_text_assessment = img_data["mllm_response"].get(
"original_alt_text_assessment", "No description"
)
new_alt_text = img_data["mllm_response"].get(
"new_alt_text", "No description"
)
alt_text_original = img_data.get("alt_text", "No alt_text provided")
"""data_frame.append(
{
"Original Alt Text": alt_text_original,
"LLM Assessment": original_alt_text_assessment,
"LLM Proposed Alt Text": new_alt_text,
}
)"""
#for idx, img_data in enumerate(
# data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_local"], 1
#):
img_data_local = data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_local"][idx-1]
original_alt_text_assessment_local = img_data_local["mllm_response"].get(
"original_alt_text_assessment", "No description"
)
new_alt_text_local = img_data_local["mllm_response"].get(
"new_alt_text", "No description"
)
#alt_text_original = img_data.get("alt_text", "No alt_text provided")
data_frame.append(
{
"Original Alt Text": alt_text_original,
"LLM Assessment 1": original_alt_text_assessment,
"LLM Proposed Alt Text 1": new_alt_text,
"LLM Assessment 2": original_alt_text_assessment_local,
"LLM Proposed Alt Text 2": new_alt_text_local,
}
)
df = pd.DataFrame(data_frame)
return df
except json.JSONDecodeError as e:
return f"Error: Invalid JSON format - {str(e)}", []
except Exception as e:
return f"Error: {str(e)}", []
def make_alttext_llm_assessment_api_call(
url,
selected_images_json=[],
db_path=None,
wcag_rest_server_url="http://localhost:8000",
user_state={},
number_of_images=30,
):
print(
f"Making API call for llm assessment for {url} to {wcag_rest_server_url}/wcag_alttext_validation"
)
selected_images = json.loads(selected_images_json) if selected_images_json else []
# print("selected_images:", selected_images)
if not selected_images or len(selected_images) == 0:
info_text = "No images selected"
print("LLM assessment not started because no valid images were selected.")
return "LLM assessment not started", pd.DataFrame(), {},False
# prepare data for insertion
json_in_str = {}
json_out_str = {}
selected_urls = []
selected_alt_text_original = []
user_assessments = []
user_new_alt_texts = []
selected_image_id = []
user_assessments_llm_proposal_1 = []
user_assessments_llm_proposal_2 = []
for img in selected_images:
selected_urls.append(img["image_url"])
selected_alt_text_original.append(img["original_alt_text"])
user_assessments.append(img["assessment"])
user_new_alt_texts.append(img["new_alt_text"])
selected_image_id.append(
int(img["image_index"]) + 1
) # add the id selected (+1 for index alignment)
user_assessments_llm_proposal_1.append(0) # default value for now
user_assessments_llm_proposal_2.append(0) # default value for now
json_in_str["images_urls"] = selected_urls
json_in_str["images_alt_text_original"] = selected_alt_text_original
json_out_str["user_assessments"] = user_assessments
json_out_str["user_new_alt_texts"] = user_new_alt_texts
json_in_str = json.dumps(json_in_str, ensure_ascii=False)
json_out_str = json.dumps(json_out_str, ensure_ascii=False)
json_user_str = json.dumps({"username": user_state["username"]}, ensure_ascii=False)
connection_db = sqlite3.connect(db_path)
# ---------
try:
response = call_API_urlibrequest(
data={
"page_url": url,
"number_of_images": number_of_images,
"context_levels": 5,
"pixel_distance_threshold": 200,
"save_images": "True",
"save_elaboration": "True",
"specific_images_urls": selected_urls,
},
url=wcag_rest_server_url + "/wcag_alttext_validation",
headers=WCAG_VALIDATOR_RESTSERVER_HEADERS,
)
# return response
info_dataframe = load_llm_assessment_from_json(response)
#print("info_dataframe:", info_dataframe)
# add the UI ids and other fields to to api response
info_dataframe.insert(
0, "Image #", selected_image_id
) # add the UI ids from to api response
info_dataframe.insert(
1, "Image url", selected_urls
) # add the image_url from to api response
info_dataframe.insert(3, "User Assessment", user_assessments)
info_dataframe.insert(4, "User Proposed Alt Text", user_new_alt_texts)
info_dataframe["User Assessment for LLM Proposal 1"] = (
user_assessments_llm_proposal_1
)
info_dataframe["User Assessment for LLM Proposal 2"] = (
user_assessments_llm_proposal_2
)
#print("info_dataframe after adding user assessments:", info_dataframe)
except Exception as e:
return {"error": str(e)}
try:
# insert after everything to keep datetime aligned
db_persistence_insert(
connection_db=connection_db,
insert_type="wcag_user_alttext_assessments",
page_url=url,
user=json_user_str,
llm_model="",
json_in_str=json_in_str,
json_out_str=json_out_str,
table="wcag_user_assessments",
)
except Exception as e:
print("Error inserting user assessment into database:", str(e))
finally:
if connection_db:
connection_db.close()
return "LLM assessment completed", info_dataframe, response, True
def make_image_extraction_api_call(
url,
number_of_images=30,
wcag_rest_server_url="http://localhost:8000",
user_assignment_current_status={},
):
print(
f"Making API call for image_extraction for {url} to {wcag_rest_server_url}/extract_images"
)
try:
response = call_API_urlibrequest(
data={
"page_url": url,
"number_of_images": number_of_images,
},
url=wcag_rest_server_url + "/extract_images",
headers=WCAG_VALIDATOR_RESTSERVER_HEADERS,
)
# return response
info_text, gallery_images = load_images_from_json(response,user_assignment_current_status)
return info_text, gallery_images
except Exception as e:
return {"error": str(e)}
def render_alttext_form(df):
"""Render a pandas DataFrame (or list/dict) into an editable HTML form."""
try:
if df is None or df.empty:
print("No data to render in form.")
html=""
return gr.update(value=html), html # return empty form
if isinstance(df, str):
df = pd.read_json(df, orient="records")
if isinstance(df, dict):
df = pd.DataFrame(df)
if isinstance(df, list):
df = pd.DataFrame(df)
html = """
Image #
Original Alt Text
User Assessment
User Proposed Alt Text
LLM Assessment 1
LLM Proposed Alt Text 1
User Assessment for LLM Proposal 1
LLM Assessment 2
LLM Proposed Alt Text 2
User Assessment for LLM Proposal 2
"""
for _, row in df.iterrows():
imgnum = row.get("Image #", "")
imgurl = row.get("Image url", "")
orig = row.get("Original Alt Text", "")
user_ass = row.get("User Assessment", "")
user_prop = row.get("User Proposed Alt Text", "")
llm1_ass = row.get("LLM Assessment 1", "")
llm2_ass = row.get("LLM Assessment 2", "")
llm1_prop = row.get("LLM Proposed Alt Text 1", "")
llm2_prop = row.get("LLM Proposed Alt Text 2", "")
user_llm1_ass = row.get("User Assessment for LLM Proposal 1", 0)
user_llm2_ass = row.get("User Assessment for LLM Proposal 2", 0)
html += f"""