import sys import argparse import json import asyncio from dependences.utils import ( disclaim_bool_string, prepare_output_folder, create_folder, return_from_env_valid, ) from datetime import datetime, timezone from dotenv import load_dotenv, find_dotenv import os import warnings warnings.filterwarnings("ignore") exception_msg = "Exception: %s" from dependences.image_extractor import ImageExtractor from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response async def cli(sys_argv): env_path = find_dotenv(filename=".env") _ = load_dotenv(env_path) # read .env file parser = argparse.ArgumentParser() if disclaim_bool_string(return_from_env_valid("USE_OPENAI_MODEL", "False")) == True: openai_model = True else: openai_model = False parser.add_argument( "--page_url", type=str, help=("Url page to analyze"), default="https://www.bbc.com", ) parser.add_argument( # first look env variable as the default value, then use the parser value "--openai_model", action="store_true", default=openai_model, help=("If passed everything is align with openai frameworks"), ) parser.add_argument( "--context_levels", type=int, default=5, help=("HTML context levels around the image"), ) parser.add_argument( "--pixel_distance_threshold", type=int, default=200, help=("pixel distance threshold around the image"), ) parser.add_argument( "--number_of_images", type=int, default=10, help=("max number of desired images"), ) parser.add_argument( "--save_elaboration", action="store_true", default=True, help=("If True save the elaborated info in a json file"), ) parser.add_argument( "--save_images", action="store_true", default=True, help=("If True save the images"), ) parser.add_argument( "--specific_images_urls", type=str, nargs="*", default=[], help=("Specific image URLs to process (space-separated)"), ) args = parser.parse_args() print("wcag validator args:", args) if args.openai_model: mllm_end_point = return_from_env_valid("mllm_end_point_openai", "") mllm_api_key = return_from_env_valid("mllm_api_key_openai", "") mllm_model_id = return_from_env_valid("mllm_model_id_openai", "") else: mllm_end_point = return_from_env_valid("mllm_end_point_local", "") mllm_api_key = return_from_env_valid("mllm_api_key_local", "") mllm_model_id = return_from_env_valid("mllm_model_id_local", "") print("mllm_end_point:", mllm_end_point) print("mllm_model_id:", mllm_model_id) images_output_dir = "" if ( disclaim_bool_string(args.save_elaboration) == True or disclaim_bool_string(args.save_images) == True ): # if something to save url_path = args.page_url.replace(":", "").replace("//", "_").replace("/", "_") now = datetime.now(timezone.utc) now_str = now.strftime("%Y_%m_%d-%H_%M_%S") folder_str = mllm_model_id.replace(":", "-") + "_" + now_str output_dir = prepare_output_folder(url_path, folder_str) if disclaim_bool_string(args.save_images) == True: images_output_dir = create_folder( output_dir, directory_separator="/", next_path="images" ) print("save images path:", images_output_dir) ### Task #1: ---------- Image Extractor # Create extractor image_extractor = ImageExtractor( args.page_url, context_levels=args.context_levels, pixel_distance_threshold=args.pixel_distance_threshold, number_of_images=args.number_of_images, save_images=args.save_images, save_images_path=images_output_dir, ) # Extract images print(f"Extracting images from: {args.page_url}") images = await image_extractor.extract_images( specific_images_urls=args.specific_images_urls ) # save extracted images info if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON await image_extractor.save_elaboration( images, output_dir=output_dir + "/extracted_images.json" ) print("--- First task completed: extracted images \n") # --------------------------------------------- ### Task #2: ---------- MLLM management mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id) print("mllm_manager.end_point:", mllm_manager.end_point) # Make alt text evaluation mllm_responses = mllm_manager.make_alt_text_evaluation( images, openai_model=args.openai_model, ) # Parse MLLM responses for i, response in enumerate(mllm_responses): parsed_resp = parse_mllm_alt_text_response(response["mllm_response"]) mllm_responses[i]["mllm_response"] = parsed_resp # save mllm responses if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON with open(output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8") as f: json.dump(mllm_responses, f, indent=2, ensure_ascii=False) print("--- Second task completed: MLLM management \n") # ------------------------------------------------ if __name__ == "__main__": asyncio.run(cli(sys.argv[1:]))