wcag_AI_validation/wcag_validator.py

import sys
import argparse
import json
import asyncio
from utils import disclaim_bool_string, prepare_output_folder, create_folder
from datetime import datetime, timezone
from dotenv import load_dotenv, find_dotenv
import os

import warnings
warnings.filterwarnings("ignore")

exception_msg = "Exception: %s"

from image_extractor import ImageExtractor
from mllm_management import MLLMManager

async def cli(sys_argv):
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--page_url",
        type=str,
        help=("Url page to analyze"),
        default="https://www.bbc.com",
    )

    parser.add_argument(
        "--context_levels",
        type=int,
        default=5,
        help=("HTML context levels around the image"),
    )
    parser.add_argument(
        "--pixel_distance_threshold",
        type=int,
        default=200,
        help=("pixel distance threshold around the image"),
    )
    parser.add_argument(
        "--number_of_images",
        type=int,
        default=10,
        help=("max number of desired images"),
    )

    parser.add_argument(
        "--save_elaboration",
        action="store_true",
        default=True,
        help=("If True save the elaborated info in a json file"),
    )

    parser.add_argument(
        "--save_images",
        action="store_true",
        default=True,
        help=("If True save the images"),
    )

    args = parser.parse_args()

    print("wcag validator args:",args)

    if (
        disclaim_bool_string(args.save_elaboration) == True
        or disclaim_bool_string(args.save_images) == True
    ):  # if something to save
        url_path = args.page_url.replace(":", "").replace("//", "_").replace("/", "_")
        now = datetime.now(timezone.utc)
        now_str = now.strftime("%Y_%m_%d-%H_%M_%S")
        output_dir = prepare_output_folder(url_path, now_str)

        if disclaim_bool_string(args.save_images) == True:
            images_output_dir = create_folder(
                output_dir, directory_separator="/", next_path="images"
            )
            print("save images path:", images_output_dir)

    ### Task #1: ----------  Image Extractor
    # Create extractor
    image_extractor = ImageExtractor(
        args.page_url,
        context_levels=args.context_levels,
        pixel_distance_threshold=args.pixel_distance_threshold,
        number_of_images=args.number_of_images,
        save_images=args.save_images,
        save_images_path=images_output_dir,
    )
    # Extract images
    print(f"Extracting images from: {args.page_url}")
    images = await image_extractor.extract_images()
    if disclaim_bool_string(args.save_elaboration) == True:  # Optionally save to JSON

        await image_extractor.save_elaboration(images,output_dir=output_dir + "/extracted_images.json")
    #---------------------------------------------

    ### Task #2: ----------  MLLM management
    env_path = find_dotenv(filename=".env")
    _ = load_dotenv(env_path)  # read .env file

    mllm_end_point=os.getenv("mllm_end_point")
    mllm_api_key=os.getenv("mllm_api_key")
    print("mllm_end_point:",mllm_end_point)

    mllm_manager = MLLMManager(mllm_end_point, mllm_api_key)
    print("mllm_manager.end_point:", mllm_manager.end_point)

    alt_text_system_prompt = mllm_manager.get_alt_text_system_prompt()
    print("alt_text_system_prompt:", alt_text_system_prompt)


    mllm_responses = []
    for img_info in images:
        alt_text="Here is the alt-text of the image: " + img_info["alt_text"]
        image_URL=img_info["url"]
        HTML_context = "Here is the surrounding HTML context of the element: " + img_info["html_context"]
        page_text = "Here is the content of the page: Title of the page: " + str(img_info["page_title"])
        page_text=page_text+", content of the <meta name='description'> tag: " + str(img_info["page_description"])
        page_text=page_text+", content of the <meta name='keywords'> tag: " + str(img_info["page_keywords"])
        # skip headings

        print("Processing image URL:", image_URL)
        print("Alt-text:", alt_text)
        print("HTML context:", HTML_context)
        print("Page text:", page_text)

        alt_text_user_prompt=mllm_manager.get_alt_text_user_prompt(
                altTextMessage=alt_text,
                imageURL=image_URL,
                HTMLcontext=HTML_context,
                pageText=page_text,
            )
        print("alt_text_user_prompt:", alt_text_user_prompt)

        mllm_response = mllm_manager.get_response(
            system_prompt=alt_text_system_prompt,
            user_prompt=alt_text_user_prompt

        )
        mllm_responses.append(mllm_response['choices'][0]["message"]["content"])

    if disclaim_bool_string(args.save_elaboration) == True:  # Optionally save to JSON

        with open(output_dir + "/mllm_responses.json", "w", encoding="utf-8") as f:
            json.dump(mllm_responses, f, indent=2, ensure_ascii=False)


if __name__ == '__main__':

    asyncio.run(cli(sys.argv[1:]))