diff --git a/.env b/.env index c1bb668..93614b6 100644 --- a/.env +++ b/.env @@ -1,10 +1,10 @@ -mllm_end_point_openai='https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview' -mllm_api_key_openai= -mllm_model_id_openai='gpt-4o' +MLLM_END_POINT_OPENAI=https://hiis-accessibility-fonderia.cognitiveservices.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2025-01-01-preview +MLLM_API_KEY_OPENAI= +MLLM_MODEL_ID_OPENAI=gpt-4o -mllm_end_point_local='https://vgpu.hiis.cloud.isti.cnr.it/api/chat' -mllm_api_key_local= -#mllm_model_id_local='gemma3:12b' -mllm_model_id_local='gemma3:4b' +MLLM_END_POINT_LOCAL=https://vgpu.hiis.cloud.isti.cnr.it/api/chat +MLLM_API_KEY_LOCAL= +#MLLM_MODEL_ID_LOCAL=gemma3:12b +MLLM_MODEL_ID_LOCAL=gemma3:4b -use_openai_model='False' \ No newline at end of file +USE_OPENAI_MODEL=True \ No newline at end of file diff --git a/README.md b/README.md index 99319a7..8d0d842 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,15 @@ python wcag_validator.py python wcag_validator_RESTserver.py ## For UI use: -python ui_alt_text.py +python wcag_validator_ui.py + +## Docker +### Rest server +docker build -t wcag_resr_server . +docker run --env-file .env -p 8000:8000 --name wcag_rest_server -d wcag_rest_server +### UI +docker build -t wcag_ui . +docker run --env-file UI/.env -p 8001:8001 --name wcag_ui -d wcag_ui + ## The scripts folder contains some elaboration scripts. They require a dedicated requirements file \ No newline at end of file diff --git a/UI/.env b/UI/.env new file mode 100644 index 0000000..1b0e7f3 --- /dev/null +++ b/UI/.env @@ -0,0 +1,4 @@ +DB_PATH=persistence/wcag_validator_ui.db +WCAG_REST_SERVER_URL=http://localhost:8000 +URL_LIST_old=["http://www.amazon.it","https://web.archive.org/web/20230630235957/http://www.amazon.com/", "https://web.archive.org/web/20251130033532/https://www.ebay.com/"] +URL_LIST=["https://amazon.com","https://ebay.com","https://walmart.com","https://etsy.com","https://target.com","https://wayfair.com","https://bestbuy.com","https://macys.com","https://homedepot.com","https://costco.com","https://www.ansa.it","https://en.wikipedia.org/wiki/Main_Page","https://www.lanazione.it","https://www.ansa.it","https://www.bbc.com","https://www.cnn.com","https://www.nytimes.com","https://www.theguardian.com"] \ No newline at end of file diff --git a/UI/dependences_ui/__pycache__/utils.cpython-310.pyc b/UI/dependences_ui/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000..30e3cb8 Binary files /dev/null and b/UI/dependences_ui/__pycache__/utils.cpython-310.pyc differ diff --git a/UI/dependences_ui/utils.py b/UI/dependences_ui/utils.py new file mode 100644 index 0000000..dd3bf70 --- /dev/null +++ b/UI/dependences_ui/utils.py @@ -0,0 +1,117 @@ +import hashlib +import json +import os +import gradio as gr + +# File to store user credentials +USERS_FILE = "users.json" + + +def load_users(): + """Load users from JSON file""" + if os.path.exists(USERS_FILE): + with open(USERS_FILE, "r") as f: + return json.load(f) + return {} + + +def save_users(users): + """Save users to JSON file""" + with open(USERS_FILE, "w") as f: + json.dump(users, f) + + +def hash_password(password): + """Hash password using SHA-256""" + return hashlib.sha256(password.encode()).hexdigest() + + +def register_user(username, password, confirm_password): + """Register a new user""" + if not username or not password: + return "", "Username and password cannot be empty!", None + + if password != confirm_password: + return "", "Passwords do not match!", None + + if len(password) < 6: + return "", "Password must be at least 6 characters long!", None + + users = load_users() + + if username in users: + return "", "Username already exists!", None + + users[username] = hash_password(password) + save_users(users) + + return "", f"✅ Registration successful! You can now login.", None + + +def login_user(username, password, state): + """Validate user login""" + if not username or not password: + return ( + "Please enter both username and password!", + "", + state, + gr.update(visible=True), + gr.update(visible=False), + gr.update(visible=False), + gr.update(open=True), + ) + + users = load_users() + + if username not in users: + return ( + "Invalid username or password!", + "", + state, + gr.update(visible=True), + gr.update(visible=False), + gr.update(visible=False), + gr.update(open=True), + ) + + if users[username] != hash_password(password): + return ( + "Invalid username or password!", + "", + state, + gr.update(visible=True), + gr.update(visible=False), + gr.update(visible=False), + gr.update(open=True), + ) + + # Login successful + state = {"logged_in": True, "username": username} + return ( + f"✅ Welcome back, {username}!", + "", + state, + gr.update(visible=False), + gr.update(visible=True), + gr.update(visible=True), + gr.update(open=False), + ) + + +def logout_user(state): + """Logout current user""" + state = {"logged_in": False, "username": None} + return ( + "Logged out successfully!", + state, + gr.update(visible=True), + gr.update(visible=False), + gr.update(visible=False), + ) + + +def protected_content(state): + """Content only accessible to logged-in users""" + if state.get("logged_in"): + return f"You are logged as {state.get('username')}\n" + return "Please login to access this content." diff --git a/UI/requirements_UI.txt b/UI/requirements_UI.txt new file mode 100644 index 0000000..8bb7be9 --- /dev/null +++ b/UI/requirements_UI.txt @@ -0,0 +1,4 @@ +gradio==5.49.1 +pandas==2.3.3 +python-dotenv==1.2.1 +requests==2.32.5 \ No newline at end of file diff --git a/UI/requirements_extra.txt b/UI/requirements_extra.txt deleted file mode 100644 index 4bf8a16..0000000 --- a/UI/requirements_extra.txt +++ /dev/null @@ -1 +0,0 @@ -gradio==5.49.1 \ No newline at end of file diff --git a/UI/ui_alt_text.py b/UI/wcag_validator_ui.py similarity index 59% rename from UI/ui_alt_text.py rename to UI/wcag_validator_ui.py index fe9ad6e..8cd3f46 100644 --- a/UI/ui_alt_text.py +++ b/UI/wcag_validator_ui.py @@ -1,11 +1,24 @@ #### To launch the script -# gradio ui_alt_text.py -# python ui_alt_text.py +# gradio wcag_validator_ui.py +# python wcag_validator_ui.py import gradio as gr import requests +from pathlib import Path +import sys +import pandas as pd -# from ..dependences.utils import call_API_urlibrequest +parent_dir = Path(__file__).parent.parent +sys.path.insert(0, str(parent_dir)) +from dotenv import load_dotenv, find_dotenv +from dependences.utils import ( + call_API_urlibrequest, + create_folder, + db_persistence_startup, + db_persistence_insert, + return_from_env_valid, +) +from dependences_ui.utils import * import logging import time import json @@ -17,196 +30,6 @@ import sqlite3 WCAG_VALIDATOR_RESTSERVER_HEADERS = [("Content-Type", "application/json")] -url_list = [ - "https://amazon.com", - "https://web.archive.org/web/20251126051721/https://www.amazon.com/", - "https://web.archive.org/web/20230630235957/http://www.amazon.com/", - "https://ebay.com", - "https://walmart.com", - "https://etsy.com", - "https://target.com", - "https://wayfair.com", - "https://bestbuy.com", - "https://macys.com", - "https://homedepot.com", - "https://costco.com", - "https://www.ansa.it", - "https://en.wikipedia.org/wiki/Main_Page", - "https://www.lanazione.it", - "https://www.ansa.it", - "https://www.bbc.com", - "https://www.cnn.com", - "https://www.nytimes.com", - "https://www.theguardian.com", -] - -# ------ TODO use from utils instead of redefining here - - -def call_API_urlibrequest( - data={}, - verbose=False, - url="", - headers=[], - method="post", - base=2, # number of seconds to wait - max_tries=3, -): - - if verbose: - logging.info("input_data:%s", data) - - # Allow multiple attempts to call the API incase of downtime. - # Return provided response to user after 3 failed attempts. - wait_seconds = [base**i for i in range(max_tries)] - - for num_tries in range(max_tries): - try: - - if method == "get": - - # Encode the parameters and append them to the URL - query_string = urllib.parse.urlencode(data) - - url_with_params = f"{url}?{query_string}" - request = urllib.request.Request(url_with_params, method="GET") - for ele in headers: - - request.add_header(ele[0], ele[1]) - - elif method == "post": - # Convert the dictionary to a JSON formatted string and encode it to bytes - data_to_send = json.dumps(data).encode("utf-8") - - request = urllib.request.Request(url, data=data_to_send, method="POST") - for ele in headers: - - request.add_header(ele[0], ele[1]) - else: - return {"error_message": "method_not_allowed"} - - # Send the request and capture the response - - with urllib.request.urlopen(request) as response: - # Read and decode the response - - response_json = json.loads(response.read().decode("utf-8")) - logging.info("response_json:%s", response_json) - - logging.info("response.status_code:%s", response.getcode()) - return response_json - - except Exception as e: - - logging.error("error message:%s", e) - response_json = {"error": e} - - logging.info("num_tries:%s", num_tries) - logging.info( - "Waiting %s seconds before automatically trying again.", - str(wait_seconds[num_tries]), - ) - time.sleep(wait_seconds[num_tries]) - - logging.info( - "Tried %s times to make API call to get a valid response object", max_tries - ) - logging.info("Returning provided response") - return response_json - - -def create_folder(root_path, directory_separator, next_path): - output_dir = root_path + directory_separator + next_path - try: - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - except Exception as e: - logging.error(exception_msg, e) - - exit(1) - return output_dir - - -def db_persistence_startup( - db_name_and_path="persistence/wcag_validator.db", - table="wcag_validator_results", -): - - try: - - _ = create_folder( - root_path=os.getcwd(), - directory_separator="/", - next_path="persistence", - ) - - except Exception as e: - logging.error("exception on db persistence startup:%s", e) - - exit(1) - try: - db_connection = sqlite3.connect(db_name_and_path) - cursor = db_connection.cursor() - # Create a table to store JSON data - cursor.execute( - """CREATE TABLE IF NOT EXISTS """ - + table - + """ ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - insertion_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - insert_type TEXT, - json_input_data TEXT, json_output_data TEXT - )""" - ) - - db_connection.commit() - logging.info("connection to the database established") - return db_connection - - except Exception as e: - - logging.error("db_management problem:%s", e) - exit(1) - - -def db_persistence_insert( - connection_db, - insert_type, - json_in_str, - json_out_str, - table="wcag_validator_results", -): - - try: - cursor = connection_db.cursor() - - # Insert JSON data into the table along with the current timestamp - cursor.execute( - "INSERT INTO " - + table - + " (insert_type,json_input_data,json_output_data) VALUES (?,?,?)", - (insert_type, json_in_str, json_out_str), - ) - connection_db.commit() - logging.info( - "Data correctly saved on local db table:%s, insertion type:%s", - table, - insert_type, - ) - except Exception as e: - logging.error("exception" + " %s", e) - - -# ------- End TODO use from utils instead of redefining here - - -# Method 1: Embed external website (works only for sites that allow iframes) -def create_iframe(url): - iframe_html = ( - f'' - ) - return iframe_html def load_images_from_json(json_input): @@ -361,7 +184,7 @@ def load_images_from_json(json_input): """ - info_text += f"✓ Image {idx+1} alt_text: {alt_text}\n" + # info_text += f"✓ Image {idx+1} alt_text: {alt_text}\n" html += "" return info_text, html @@ -380,13 +203,14 @@ def load_llm_assessment_from_json(json_input): if "mllm_validations" not in data or not data["mllm_validations"]: print("no mllm_validations found") - return "No mllm_validations found in JSON", [] + return pd.DataFrame() info_text = f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)\n\n" print( f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)" ) + data_frame = [] for idx, img_data in enumerate( data["mllm_validations"]["mllm_alttext_assessments"], 1 ): @@ -399,9 +223,17 @@ def load_llm_assessment_from_json(json_input): ) alt_text_original = img_data.get("alt_text", "No alt_text provided") - info_text += f"✓ alt_text original: {alt_text_original}. LLM assessment: {original_alt_text_assessment} => LLM proposed alt_text: {new_alt_text}\n" + data_frame.append( + { + + "Original Alt Text": alt_text_original, + "LLM Assessment": original_alt_text_assessment, + "Proposed Alt Text": new_alt_text, + } + ) - return info_text + df = pd.DataFrame(data_frame) + return df except json.JSONDecodeError as e: return f"Error: Invalid JSON format - {str(e)}", [] @@ -410,19 +242,51 @@ def load_llm_assessment_from_json(json_input): def make_alttext_llm_assessment_api_call( - url, selected_images_json=[], number_of_images=30 + url, + selected_images_json=[], + db_path=None, + wcga_rest_server_url="http://localhost:8000", + user_state={}, + number_of_images=30, ): - print(f"Making API call to {url}") + + print( + f"Making API call for llm assessment for {url} to {wcga_rest_server_url}/wcag_alttext_validation" + ) selected_images = json.loads(selected_images_json) if selected_images_json else [] - print("selected_images:", selected_images) + # print("selected_images:", selected_images) if not selected_images or len(selected_images) == 0: info_text = "No images selected" - return info_text + print(info_text) + return pd.DataFrame() + # prepare data for insertion + json_in_str = {} + json_out_str = {} selected_urls = [] + selected_alt_text_original = [] + user_assessments = [] + user_new_alt_texts = [] + selected_image_id = [] for img in selected_images: selected_urls.append(img["image_url"]) + selected_alt_text_original.append(img["original_alt_text"]) + user_assessments.append(img["assessment"]) + user_new_alt_texts.append(img["new_alt_text"]) + selected_image_id.append( + int(img["image_index"]) + 1 + ) # add the id selected (+1 for index alignment) + json_in_str["images_urls"] = selected_urls + json_in_str["images_alt_text_original"] = selected_alt_text_original + json_out_str["user_assessments"] = user_assessments + json_out_str["user_new_alt_texts"] = user_new_alt_texts + json_in_str = json.dumps(json_in_str, ensure_ascii=False) + json_out_str = json.dumps(json_out_str, ensure_ascii=False) + json_user_str = json.dumps({"username": user_state["username"]}, ensure_ascii=False) + connection_db = sqlite3.connect(db_path) + # --------- + try: response = call_API_urlibrequest( @@ -435,19 +299,46 @@ def make_alttext_llm_assessment_api_call( "save_elaboration": "True", "specific_images_urls": selected_urls, }, - url="http://localhost:8000/wcag_alttext_validation", + url=wcga_rest_server_url + "/wcag_alttext_validation", headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, ) # return response - info_text = load_llm_assessment_from_json(response) + info_dataframe = load_llm_assessment_from_json(response) + info_dataframe.insert( + 0, 'Image #', selected_image_id + ) # add the UI ids from to api response - return info_text except Exception as e: return {"error": str(e)} + try: + # insert after everything to keep datetime aligned + db_persistence_insert( + connection_db=connection_db, + insert_type="wcag_user_alttext_assessments", + page_url=url, + user=json_user_str, + llm_model="", + json_in_str=json_in_str, + json_out_str=json_out_str, + table="wcag_user_assessments", + ) + except Exception as e: + print("Error inserting user assessment into database:", str(e)) + finally: + if connection_db: + connection_db.close() + return info_dataframe -def make_image_extraction_api_call(url, number_of_images=30): - print(f"Making API call to {url}") + +def make_image_extraction_api_call( + url, + number_of_images=30, + wcga_rest_server_url="http://localhost:8000", +): + print( + f"Making API call for image_extraction for {url} to {wcga_rest_server_url}/extract_images" + ) try: response = call_API_urlibrequest( @@ -455,7 +346,7 @@ def make_image_extraction_api_call(url, number_of_images=30): "page_url": url, "number_of_images": number_of_images, }, - url="http://localhost:8000/extract_images", + url=wcga_rest_server_url + "/extract_images", headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, ) # return response @@ -468,18 +359,83 @@ def make_image_extraction_api_call(url, number_of_images=30): # ------- Gradio Interface -------# -# Global variable to hold database connection -connection_db = db_persistence_startup(table="wcag_user_assessments") # Create Gradio interface -with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: +with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: - # Use the global connection_db reference - print("Database connection reference available globally") + env_path = find_dotenv(filename=".env") + if env_path == "": + print("env path not found: service starting with the default params values") + _ = load_dotenv(env_path) # read .env file + db_path = return_from_env_valid("DB_PATH", "persistence/wcag_validator_ui.db") + print("db_path:", db_path) + wcga_rest_server_url = return_from_env_valid( + "WCGA_REST_SERVER_URL", "http://localhost:8000" + ) + + default_urls = [ + "https://amazon.com", + "https://ebay.com", + ] + url_list_str=return_from_env_valid("URL_LIST",json.dumps(default_urls)) + url_list = json.loads(url_list_str) + + print("wcga_rest_server_url:", wcga_rest_server_url) + + connection_db = db_persistence_startup( + db_name_and_path=db_path, table="wcag_user_assessments" + ) + print("Database connection reference available:", connection_db) + connection_db.close() gr.Markdown("# WCAG AI Validator UI") - with gr.Tab("Alt Text Assessment"): + # login section + user_state = gr.State({"logged_in": False, "username": None}) + with gr.Accordion(label="Register & Login", open=True) as register_and_login: + with gr.Column(visible=True) as login_section: + gr.Markdown("## Login / Register") + + with gr.Tab("Login"): + login_username = gr.Textbox( + label="Username", placeholder="Enter your username" + ) + login_password = gr.Textbox( + label="Password", type="password", placeholder="Enter your password" + ) + login_btn = gr.Button("Login", variant="primary") + login_msg = gr.Textbox(label="Login Status", interactive=False) + + with gr.Tab("Register"): + reg_username = gr.Textbox( + label="Username", placeholder="Choose a username" + ) + reg_password = gr.Textbox( + label="Password", + type="password", + placeholder="Choose a password (min 6 characters)", + ) + reg_confirm = gr.Textbox( + label="Confirm Password", + type="password", + placeholder="Confirm your password", + ) + reg_btn = gr.Button("Register", variant="primary") + reg_msg = gr.Textbox(label="Registration Status", interactive=False) + + with gr.Column(visible=False) as protected_section: + + content_display = gr.Textbox( + label="Your account", lines=5, interactive=False + ) + logout_btn = gr.Button("Logout", variant="stop") + + # end login section + + with gr.Tab("Alt Text Assessment", visible=False) as alttext_assessment: + + db_path_state = gr.State(value=db_path) # Store path in State + wcga_rest_server_url_state = gr.State(value=wcga_rest_server_url) with gr.Row(): with gr.Column(): @@ -492,10 +448,17 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: label="Select an URL", info="Select an URL to load in iframe", ) + images_number = gr.Slider( + 5, + 100, + value=30, + step=5, + label="Max number of images to retrieve", + ) with gr.Column(): image_extraction_api_call_btn = gr.Button( - "Extract Images & Alt Text", variant="primary" + "Extract Images & Alt Texts", variant="primary" ) alttext_api_call_btn = gr.Button( "Alt Text LLM Assessment", @@ -505,15 +468,27 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: with gr.Row(): - image_info_output = gr.Textbox(label="Original alt-text", lines=5) - alttext_info_output = gr.Textbox(label="LLM Assessment", lines=5) + image_info_output = gr.Textbox(label="Managed Images", lines=5) + + # Use DataFrame for tabular output + alttext_info_output = gr.DataFrame( + headers=[ + "Image #", + "Original Alt Text", + "LLM Assessment", + "Proposed Alt Text", + ], + label="LLM Assessment Results", + wrap=True, # Wrap text in cells + interactive=False, + ) with gr.Row(): gallery_html = gr.HTML(label="Image Gallery") image_extraction_api_call_btn.click( - fn=lambda: ("", "", "", gr.Button(interactive=False)), + fn=lambda: ("", "", pd.DataFrame(), gr.Button(interactive=False)), inputs=[], outputs=[ image_info_output, @@ -523,7 +498,7 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: ], ).then( make_image_extraction_api_call, - inputs=[url_input], + inputs=[url_input, images_number, wcga_rest_server_url_state], outputs=[image_info_output, gallery_html], ).then( fn=lambda: gr.Button(interactive=True), @@ -535,7 +510,13 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: alttext_api_call_btn.click( fn=make_alttext_llm_assessment_api_call, - inputs=[url_input, gallery_html], + inputs=[ + url_input, + gallery_html, + db_path_state, + wcga_rest_server_url_state, + user_state, + ], outputs=[alttext_info_output], js=""" (url_input,gallery_html) => { @@ -558,6 +539,7 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: const newAltText = document.querySelector('.new-alt-text[data-index="' + index + '"]').value; selectedData.push({ + image_index: index, image_url: imageUrl, original_alt_text: originalAlt, assessment: parseInt(assessment), @@ -570,7 +552,40 @@ with gr.Blocks(theme="Insuz/SimpleIndigo", title="WCAG AI Validator") as demo: """, ) + # placed here at the end to give full contents visibility to events + # Event handlers + login_btn.click( + fn=login_user, + inputs=[login_username, login_password, user_state], + outputs=[ + login_msg, + reg_msg, + user_state, + login_section, + protected_section, + alttext_assessment, + register_and_login, + ], + ).then(fn=protected_content, inputs=[user_state], outputs=[content_display]) + + reg_btn.click( + fn=register_user, + inputs=[reg_username, reg_password, reg_confirm], + outputs=[login_msg, reg_msg, user_state], + ) + + logout_btn.click( + fn=logout_user, + inputs=[user_state], + outputs=[ + login_msg, + user_state, + login_section, + protected_section, + alttext_assessment, + ], + ) + if __name__ == "__main__": - # connection_db = db_persistence_startup(table="wcag_user_assessments") - demo.launch() + demo.launch(server_name="0.0.0.0", server_port=7860) diff --git a/dependences/__pycache__/image_extractor.cpython-310.pyc b/dependences/__pycache__/image_extractor.cpython-310.pyc deleted file mode 100644 index b22845b..0000000 Binary files a/dependences/__pycache__/image_extractor.cpython-310.pyc and /dev/null differ diff --git a/dependences/__pycache__/mllm_management.cpython-310.pyc b/dependences/__pycache__/mllm_management.cpython-310.pyc deleted file mode 100644 index c70abca..0000000 Binary files a/dependences/__pycache__/mllm_management.cpython-310.pyc and /dev/null differ diff --git a/dependences/__pycache__/utils.cpython-310.pyc b/dependences/__pycache__/utils.cpython-310.pyc deleted file mode 100644 index e34aab6..0000000 Binary files a/dependences/__pycache__/utils.cpython-310.pyc and /dev/null differ diff --git a/dependences/image_extractor.py b/dependences/image_extractor.py index 600dfad..357e41f 100644 --- a/dependences/image_extractor.py +++ b/dependences/image_extractor.py @@ -347,7 +347,7 @@ class ImageExtractor: #await page.goto(self.url, wait_until="networkidle") # method 1: use if the page has unpredictable async content and there is the need to ensure everything loads # The "networkidle" approach is generally more robust but slower, while the fixed timeout is faster but less adaptive to actual page behavior. # ---alternative method2: use if there is total awareness of the page's loading pattern and want faster, more reliable execution - await page.goto(self.url, wait_until="load") + await page.goto(self.url, timeout=50000, wait_until="load")# deafult timeout=30000, 30sec # Wait for page to load completely await page.wait_for_timeout(2000) # Wait for dynamic content # ----- @@ -380,7 +380,7 @@ class ImageExtractor: try: img_element = await page.locator( f'img[src="{url}"]' - ).first.element_handle() # Use first() to get only the first match + ).first.element_handle(timeout=0) # Use first() to get only the first match; 0 timeout=No timeout if img_element: img_elements.append(img_element) except Exception as e: diff --git a/dependences/utils.py b/dependences/utils.py index a8a9232..3253ece 100644 --- a/dependences/utils.py +++ b/dependences/utils.py @@ -160,6 +160,9 @@ def db_persistence_startup( id INTEGER PRIMARY KEY AUTOINCREMENT, insertion_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, insert_type TEXT, + page_url TEXT, + user TEXT, + llm_model TEXT, json_input_data TEXT, json_output_data TEXT )""" ) @@ -177,8 +180,11 @@ def db_persistence_startup( def db_persistence_insert( connection_db, insert_type, - json_in_str, - json_out_str, + page_url, + user="", + llm_model="", + json_in_str="", + json_out_str="", table="wcag_validator_results", ): @@ -189,8 +195,8 @@ def db_persistence_insert( cursor.execute( "INSERT INTO " + table - + " (insert_type,json_input_data,json_output_data) VALUES (?,?,?)", - (insert_type, json_in_str, json_out_str), + + " (insert_type,page_url,user,llm_model,json_input_data,json_output_data) VALUES (?,?,?,?,?,?)", + (insert_type, page_url, user, llm_model, json_in_str, json_out_str), ) connection_db.commit() logging.info( diff --git a/docker/UI/Dockerfile b/docker/UI/Dockerfile new file mode 100644 index 0000000..3b64cc2 --- /dev/null +++ b/docker/UI/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim + +COPY /docker/UI/requirements_UI.txt /tmp/requirements_UI.txt + +RUN pip install --no-cache-dir -r /tmp/requirements_UI.txt + +RUN rm /tmp/requirements_UI.txt + +COPY dependences /dependences + +COPY /UI/persistence /UI/persistence +COPY /UI/dependences_ui /UI/dependences_ui +COPY /UI/wcag_validator_ui.py /UI/wcag_validator_ui.py + +EXPOSE 7860 + +WORKDIR /UI +CMD ["python","wcag_validator_ui.py"] + + + diff --git a/docker/UI/requirements_UI.txt b/docker/UI/requirements_UI.txt new file mode 100644 index 0000000..8bb7be9 --- /dev/null +++ b/docker/UI/requirements_UI.txt @@ -0,0 +1,4 @@ +gradio==5.49.1 +pandas==2.3.3 +python-dotenv==1.2.1 +requests==2.32.5 \ No newline at end of file diff --git a/docker/restServer/Dockerfile b/docker/restServer/Dockerfile new file mode 100644 index 0000000..110aa6e --- /dev/null +++ b/docker/restServer/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.10-slim + +COPY /docker/restServer/requirements.txt /tmp/requirements.txt + +RUN pip install --no-cache-dir -r /tmp/requirements.txt + +# Install Playwright browsers and dependencies +RUN playwright install --with-deps + +RUN rm /tmp/requirements.txt + +COPY persistence /persistence + +COPY dependences /dependences + +COPY restserver /restserver +COPY wcag_validator_RESTserver.py wcag_validator_RESTserver.py + +EXPOSE 8000 +CMD ["python","wcag_validator_RESTserver.py"] + + + diff --git a/docker/restServer/requirements.txt b/docker/restServer/requirements.txt new file mode 100644 index 0000000..2aebf1b --- /dev/null +++ b/docker/restServer/requirements.txt @@ -0,0 +1,6 @@ +pandas==2.3.3 +playwright==1.56.0 +python-dotenv==1.2.1 +requests==2.32.5 +uvicorn==0.38.0 +fastapi==0.121.2 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 55375ef..2aebf1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ pandas==2.3.3 playwright==1.56.0 python-dotenv==1.2.1 requests==2.32.5 -uvicorn==0.38.0 \ No newline at end of file +uvicorn==0.38.0 +fastapi==0.121.2 \ No newline at end of file diff --git a/restserver/routers/__pycache__/routes_health.cpython-310.pyc b/restserver/routers/__pycache__/routes_health.cpython-310.pyc deleted file mode 100644 index 020415c..0000000 Binary files a/restserver/routers/__pycache__/routes_health.cpython-310.pyc and /dev/null differ diff --git a/restserver/routers/__pycache__/routes_local_db.cpython-310.pyc b/restserver/routers/__pycache__/routes_local_db.cpython-310.pyc deleted file mode 100644 index 3c2df64..0000000 Binary files a/restserver/routers/__pycache__/routes_local_db.cpython-310.pyc and /dev/null differ diff --git a/restserver/routers/__pycache__/routes_wcag_alttext.cpython-310.pyc b/restserver/routers/__pycache__/routes_wcag_alttext.cpython-310.pyc deleted file mode 100644 index 19944fe..0000000 Binary files a/restserver/routers/__pycache__/routes_wcag_alttext.cpython-310.pyc and /dev/null differ diff --git a/restserver/routers/routes_wcag_alttext.py b/restserver/routers/routes_wcag_alttext.py index 4204cf7..15afa9e 100644 --- a/restserver/routers/routes_wcag_alttext.py +++ b/restserver/routers/routes_wcag_alttext.py @@ -24,7 +24,7 @@ class WCAGAltTextValuation(BaseModel): context_levels: int = 5 pixel_distance_threshold: int = 200 number_of_images: int = 10 - save_images: str = "True" + save_images: str = "True" save_elaboration: str = "True" specific_images_urls: List[str] = [] @@ -110,13 +110,12 @@ class WCAGAltTextValuationRoutes: images, openai_model=self.mllm_settings["openai_model"], ) - # Parse MLLM responses + # Parse MLLM responses for i, response in enumerate(mllm_responses): parsed_resp = parse_mllm_alt_text_response(response["mllm_response"]) mllm_responses[i]["mllm_response"] = parsed_resp mllm_responses_object = { - "mllm_model_id": mllm_model_id, "mllm_alttext_assessments": mllm_responses, } @@ -133,6 +132,8 @@ class WCAGAltTextValuationRoutes: db_persistence_insert( connection_db=self.connection_db, insert_type="wcag_alttext_validation", + page_url=json_content["page_url"], + llm_model=mllm_model_id, json_in_str=json_in_str, json_out_str=json_out_str, table="wcag_validator_results",