wcag_AI_validation/mllm_management.py

from utils import call_API_urlibrequest


class MLLMManager:
    def __init__(self, end_point, api_key):
        self.end_point = end_point
        self.api_key = api_key

    def get_response(self, system_prompt, user_prompt):
        payload = self.create_mllm_payload(system_prompt, user_prompt)
        headers = [
            ["Content-Type", "application/json"],
            ["Authorization", f"Bearer {self.api_key}"]
        ]
        response = call_API_urlibrequest(url=self.end_point, headers=headers, data=payload)
        return response

    def create_mllm_payload(self, system_prompt, user_prompt):
        payload = {
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            "temperature": 0.7,
            "top_p": 0.95,
            "frequency_penalty": 0,
            "presence_penalty": 0,
            "max_tokens": 800,
            "stop": None,
        }
        return payload

    def get_alt_text_system_prompt(self):
        system_prompt = """You are a web accessibility evaluation tool. Your task is to evaluate if alterative text for
                images on webpages are appropriate according to WCAG guidelines. The alt-text should serve the same purpose and present
                the same information as the image, and should be able to substitute for the non-text content. The text alternative would
                be brief but as informative as possible.

                Follow these instructions carefully:
                1. You will be provided as input with the following:
                - The image found on the webpage.
                - The associated alternative text. When the alt-text is empty or absent, you will be explicitly informed.
                - The surrounding context of the image.
                - The page title, headings and the content of the “keywords” and “description” <meta> tag, if found.

                2. Determine the function and purpose of the image by analyzing these elements. Take into account the purpose and function
                of the associated image by considering the page context. Check also if the image is, or is associated with, a link or a button,
                and consider this in your judgement. If the image contains text use that as part of the context.

                3. Provide a final assessment based on the following:
                - 'success' if you can assess with 'sufficient certainty' the alt-text is appropriate in relation to the image purpose,
                - 'failure' if you can assess with 'sufficient certainty' that the alt-text is NOT appropriate,
                - 'warning' if you cannot determine with 'sufficient certainty'.
                where the level of certainty goes from 1 to 100 and 'sufficient certainty' means > 80

                4. The original alt-text assessment on a scale from 1 to 5, where 5 is the best score. Use an integer number only.

                5. Provide a brief reasoning for your judgment. If the image contains text, write it verbatim. Your response should be in English.

                6. Keep your response within 150 words.

                7. Generate the new most appropriate alt-text given the context and the steps before. Keep this within 30 words.

                8. Here is the JSON format the results must have:
                {"Original alt-text assessment" : "*your original alt-text assessment*", "Assessment" : "*your assessment*", "EvaluationResult": "*your response*", "New alt-text":"*new alt-text*"}"""
        return system_prompt

    def get_alt_text_user_prompt(self, altTextMessage, imageURL, HTMLcontext, pageText):
        user_prompt= [{ "type": "text", "text": altTextMessage }, { "type": "image_url", "image_url": { "url": imageURL }}, { "type": "text", "text": HTMLcontext }, { "type": "text", "text": pageText }]
        return user_prompt