wcag_AI_validation/wcag_validator.py

import sys
import argparse
import json
import asyncio
from dependences.utils import (
    disclaim_bool_string,
    prepare_output_folder,
    create_folder,
    return_from_env_valid,
)
from datetime import datetime, timezone
from dotenv import load_dotenv, find_dotenv
import os

import warnings

warnings.filterwarnings("ignore")

exception_msg = "Exception: %s"

from dependences.image_extractor import ImageExtractor
from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response


async def cli(sys_argv):

    env_path = find_dotenv(filename=".env")
    _ = load_dotenv(env_path)  # read .env file

    parser = argparse.ArgumentParser()

    if disclaim_bool_string(return_from_env_valid("USE_OPENAI_MODEL", "False")) == True:
        openai_model = True
    else:
        openai_model = False

    parser.add_argument(
        "--page_url",
        type=str,
        help=("Url page to analyze"),
        default="https://www.bbc.com",
    )

    parser.add_argument(  # first look env variable as the default value, then use the parser value
        "--openai_model",
        action="store_true",
        default=openai_model,
        help=("If passed everything is align with openai frameworks"),
    )

    parser.add_argument(
        "--context_levels",
        type=int,
        default=5,
        help=("HTML context levels around the image"),
    )
    parser.add_argument(
        "--pixel_distance_threshold",
        type=int,
        default=200,
        help=("pixel distance threshold around the image"),
    )
    parser.add_argument(
        "--number_of_images",
        type=int,
        default=10,
        help=("max number of desired images"),
    )

    parser.add_argument(
        "--save_elaboration",
        action="store_true",
        default=True,
        help=("If True save the elaborated info in a json file"),
    )

    parser.add_argument(
        "--save_images",
        action="store_true",
        default=True,
        help=("If True save the images"),
    )

    parser.add_argument(
        "--specific_images_urls",
        type=str,
        nargs="*",
        default=[],
        help=("Specific image URLs to process (space-separated)"),
    )

    args = parser.parse_args()

    print("wcag validator args:", args)

    if args.openai_model:
        mllm_end_point = return_from_env_valid("mllm_end_point_openai", "")
        mllm_api_key = return_from_env_valid("mllm_api_key_openai", "")
        mllm_model_id = return_from_env_valid("mllm_model_id_openai", "")
    else:
        mllm_end_point = return_from_env_valid("mllm_end_point_local", "")
        mllm_api_key = return_from_env_valid("mllm_api_key_local", "")
        mllm_model_id = return_from_env_valid("mllm_model_id_local", "")

    print("mllm_end_point:", mllm_end_point)
    print("mllm_model_id:", mllm_model_id)

    images_output_dir = ""
    if (
        disclaim_bool_string(args.save_elaboration) == True
        or disclaim_bool_string(args.save_images) == True
    ):  # if something to save
        url_path = args.page_url.replace(":", "").replace("//", "_").replace("/", "_")
        now = datetime.now(timezone.utc)
        now_str = now.strftime("%Y_%m_%d-%H_%M_%S")
        folder_str = mllm_model_id.replace(":", "-") + "_" + now_str
        output_dir = prepare_output_folder(url_path, folder_str)

        if disclaim_bool_string(args.save_images) == True:
            images_output_dir = create_folder(
                output_dir, directory_separator="/", next_path="images"
            )
            print("save images path:", images_output_dir)

    ### Task #1: ----------  Image Extractor
    # Create extractor
    image_extractor = ImageExtractor(
        args.page_url,
        context_levels=args.context_levels,
        pixel_distance_threshold=args.pixel_distance_threshold,
        number_of_images=args.number_of_images,
        save_images=args.save_images,
        save_images_path=images_output_dir,
    )
    # Extract images
    print(f"Extracting images from: {args.page_url}")
    images = await image_extractor.extract_images(
        specific_images_urls=args.specific_images_urls
    )

    # save extracted images info
    if disclaim_bool_string(args.save_elaboration) == True:  # Optionally save to JSON

        await image_extractor.save_elaboration(
            images, output_dir=output_dir + "/extracted_images.json"
        )

    print("--- First task completed: extracted images \n")
    # ---------------------------------------------

    ### Task #2: ----------  MLLM management
    mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id)
    print("mllm_manager.end_point:", mllm_manager.end_point)

    # Make alt text evaluation
    mllm_responses = mllm_manager.make_alt_text_evaluation(
        images,
        openai_model=args.openai_model,
    )

    # Parse MLLM responses
    for i, response in enumerate(mllm_responses):
        parsed_resp = parse_mllm_alt_text_response(response["mllm_response"])
        mllm_responses[i]["mllm_response"] = parsed_resp

    # save mllm responses
    if disclaim_bool_string(args.save_elaboration) == True:  # Optionally save to JSON

        with open(output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8") as f:
            json.dump(mllm_responses, f, indent=2, ensure_ascii=False)

    print("--- Second task completed: MLLM management \n")
    # ------------------------------------------------


if __name__ == "__main__":

    asyncio.run(cli(sys.argv[1:]))