179 lines
5.4 KiB
Python
179 lines
5.4 KiB
Python
import sys
|
|
import argparse
|
|
import json
|
|
import asyncio
|
|
from dependences.utils import (
|
|
disclaim_bool_string,
|
|
prepare_output_folder,
|
|
create_folder,
|
|
return_from_env_valid,
|
|
)
|
|
from datetime import datetime, timezone
|
|
from dotenv import load_dotenv, find_dotenv
|
|
import os
|
|
|
|
import warnings
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
exception_msg = "Exception: %s"
|
|
|
|
from dependences.image_extractor import ImageExtractor
|
|
from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response
|
|
|
|
|
|
async def cli(sys_argv):
|
|
|
|
env_path = find_dotenv(filename=".env")
|
|
_ = load_dotenv(env_path) # read .env file
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
if disclaim_bool_string(return_from_env_valid("USE_OPENAI_MODEL", "False")) == True:
|
|
openai_model = True
|
|
else:
|
|
openai_model = False
|
|
|
|
parser.add_argument(
|
|
"--page_url",
|
|
type=str,
|
|
help=("Url page to analyze"),
|
|
default="https://www.bbc.com",
|
|
)
|
|
|
|
parser.add_argument( # first look env variable as the default value, then use the parser value
|
|
"--openai_model",
|
|
action="store_true",
|
|
default=openai_model,
|
|
help=("If passed everything is align with openai frameworks"),
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--context_levels",
|
|
type=int,
|
|
default=5,
|
|
help=("HTML context levels around the image"),
|
|
)
|
|
parser.add_argument(
|
|
"--pixel_distance_threshold",
|
|
type=int,
|
|
default=200,
|
|
help=("pixel distance threshold around the image"),
|
|
)
|
|
parser.add_argument(
|
|
"--number_of_images",
|
|
type=int,
|
|
default=10,
|
|
help=("max number of desired images"),
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--save_elaboration",
|
|
action="store_true",
|
|
default=True,
|
|
help=("If True save the elaborated info in a json file"),
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--save_images",
|
|
action="store_true",
|
|
default=True,
|
|
help=("If True save the images"),
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--specific_images_urls",
|
|
type=str,
|
|
nargs="*",
|
|
default=[],
|
|
help=("Specific image URLs to process (space-separated)"),
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("wcag validator args:", args)
|
|
|
|
if args.openai_model:
|
|
mllm_end_point = return_from_env_valid("mllm_end_point_openai", "")
|
|
mllm_api_key = return_from_env_valid("mllm_api_key_openai", "")
|
|
mllm_model_id = return_from_env_valid("mllm_model_id_openai", "")
|
|
else:
|
|
mllm_end_point = return_from_env_valid("mllm_end_point_local", "")
|
|
mllm_api_key = return_from_env_valid("mllm_api_key_local", "")
|
|
mllm_model_id = return_from_env_valid("mllm_model_id_local", "")
|
|
|
|
print("mllm_end_point:", mllm_end_point)
|
|
print("mllm_model_id:", mllm_model_id)
|
|
|
|
images_output_dir = ""
|
|
if (
|
|
disclaim_bool_string(args.save_elaboration) == True
|
|
or disclaim_bool_string(args.save_images) == True
|
|
): # if something to save
|
|
url_path = args.page_url.replace(":", "").replace("//", "_").replace("/", "_")
|
|
now = datetime.now(timezone.utc)
|
|
now_str = now.strftime("%Y_%m_%d-%H_%M_%S")
|
|
folder_str = mllm_model_id.replace(":", "-") + "_" + now_str
|
|
output_dir = prepare_output_folder(url_path, folder_str)
|
|
|
|
if disclaim_bool_string(args.save_images) == True:
|
|
images_output_dir = create_folder(
|
|
output_dir, directory_separator="/", next_path="images"
|
|
)
|
|
print("save images path:", images_output_dir)
|
|
|
|
### Task #1: ---------- Image Extractor
|
|
# Create extractor
|
|
image_extractor = ImageExtractor(
|
|
args.page_url,
|
|
context_levels=args.context_levels,
|
|
pixel_distance_threshold=args.pixel_distance_threshold,
|
|
number_of_images=args.number_of_images,
|
|
save_images=args.save_images,
|
|
save_images_path=images_output_dir,
|
|
)
|
|
# Extract images
|
|
print(f"Extracting images from: {args.page_url}")
|
|
images = await image_extractor.extract_images(
|
|
specific_images_urls=args.specific_images_urls
|
|
)
|
|
|
|
# save extracted images info
|
|
if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON
|
|
|
|
await image_extractor.save_elaboration(
|
|
images, output_dir=output_dir + "/extracted_images.json"
|
|
)
|
|
|
|
print("--- First task completed: extracted images \n")
|
|
# ---------------------------------------------
|
|
|
|
### Task #2: ---------- MLLM management
|
|
mllm_manager = MLLMManager(mllm_end_point, mllm_api_key, mllm_model_id)
|
|
print("mllm_manager.end_point:", mllm_manager.end_point)
|
|
|
|
# Make alt text evaluation
|
|
mllm_responses = mllm_manager.make_alt_text_evaluation(
|
|
images,
|
|
openai_model=args.openai_model,
|
|
)
|
|
|
|
# Parse MLLM responses
|
|
for i, response in enumerate(mllm_responses):
|
|
parsed_resp = parse_mllm_alt_text_response(response["mllm_response"])
|
|
mllm_responses[i]["mllm_response"] = parsed_resp
|
|
|
|
# save mllm responses
|
|
if disclaim_bool_string(args.save_elaboration) == True: # Optionally save to JSON
|
|
|
|
with open(output_dir + "/mllm_alttext_assessments.json", "w", encoding="utf-8") as f:
|
|
json.dump(mllm_responses, f, indent=2, ensure_ascii=False)
|
|
|
|
print("--- Second task completed: MLLM management \n")
|
|
# ------------------------------------------------
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
asyncio.run(cli(sys.argv[1:]))
|