import sys import argparse import json import asyncio from utils import disclaim_bool_string, prepare_output_folder, create_folder from datetime import datetime, timezone from dotenv import load_dotenv, find_dotenv import os import warnings warnings.filterwarnings("ignore") exception_msg = "Exception: %s" from image_extractor import ImageExtractor from mllm_management import MLLMManager async def cli(sys_argv): parser = argparse.ArgumentParser() parser.add_argument( "--page_url", type=str, help=("Url page to analyze"), default="https://www.bbc.com", ) parser.add_argument( "--context_levels", type=int, default=5, help=("HTML context levels around the image"), ) parser.add_argument( "--pixel_distance_threshold", type=int, default=200, help=("pixel distance threshold around the image"), ) parser.add_argument( "--number_of_images", type=int, default=10, help=("max number of desired images"), ) parser.add_argument( "--save_elaboration", action="store_true", default=True, help=("If True save the elaborated info in a json file"), ) parser.add_argument( "--save_images", action="store_true", default=True, help=("If True save the images"), ) args = parser.parse_args() print("wcag validator args:",args) if ( disclaim_bool_string(args.save_elaboration) == True or disclaim_bool_string(args.save_images) == True ): # if something to save url_path = args.page_url.replace(":", "").replace("//", "_").replace("/", "_") now = datetime.now(timezone.utc) now_str = now.strftime("%Y_%m_%d-%H_%M_%S") output_dir = prepare_output_folder(url_path, now_str) if disclaim_bool_string(args.save_images) == True: images_output_dir = create_folder( output_dir, directory_separator="/", next_path="images" ) print("save images path:", images_output_dir) ### Task #1: ---------- Image Extractor # Create extractor image_extractor = ImageExtractor( args.page_url, context_levels=args.context_levels, pixel_distance_threshold=args.pixel_distance_threshold, number_of_images=args.number_of_images, save_images=args.save_images, save_images_path=images_output_dir, ) # Extract images print(f"Extracting images from: {args.page_url}") images = await image_extractor.extract_images() if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON await image_extractor.save_elaboration(images,output_dir=output_dir + "/extracted_images.json") #--------------------------------------------- ### Task #2: ---------- MLLM management env_path = find_dotenv(filename=".env") _ = load_dotenv(env_path) # read .env file mllm_end_point=os.getenv("mllm_end_point") mllm_api_key=os.getenv("mllm_api_key") print("mllm_end_point:",mllm_end_point) mllm_manager = MLLMManager(mllm_end_point, mllm_api_key) print("mllm_manager.end_point:", mllm_manager.end_point) alt_text_system_prompt = mllm_manager.get_alt_text_system_prompt() print("alt_text_system_prompt:", alt_text_system_prompt) mllm_responses = [] for img_info in images: alt_text="Here is the alt-text of the image: " + img_info["alt_text"] image_URL=img_info["url"] HTML_context = "Here is the surrounding HTML context of the element: " + img_info["html_context"] page_text = "Here is the content of the page: Title of the page: " + str(img_info["page_title"]) page_text=page_text+", content of the tag: " + str(img_info["page_description"]) page_text=page_text+", content of the tag: " + str(img_info["page_keywords"]) # skip headings print("Processing image URL:", image_URL) print("Alt-text:", alt_text) print("HTML context:", HTML_context) print("Page text:", page_text) alt_text_user_prompt=mllm_manager.get_alt_text_user_prompt( altTextMessage=alt_text, imageURL=image_URL, HTMLcontext=HTML_context, pageText=page_text, ) print("alt_text_user_prompt:", alt_text_user_prompt) mllm_response = mllm_manager.get_response( system_prompt=alt_text_system_prompt, user_prompt=alt_text_user_prompt ) mllm_responses.append(mllm_response['choices'][0]["message"]["content"]) if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON with open(output_dir + "/mllm_responses.json", "w", encoding="utf-8") as f: json.dump(mllm_responses, f, indent=2, ensure_ascii=False) if __name__ == '__main__': asyncio.run(cli(sys.argv[1:]))