85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
from fastapi import APIRouter, Request
|
|
from fastapi.responses import JSONResponse
|
|
import logging
|
|
from pydantic import BaseModel
|
|
import json
|
|
from typing import Dict, List
|
|
from datetime import datetime, timezone
|
|
|
|
from dependences.utils import (
|
|
disclaim_bool_string,
|
|
prepare_output_folder,
|
|
create_folder,
|
|
db_persistence_insert,
|
|
)
|
|
from dependences.image_extractor import ImageExtractor
|
|
from dependences.mllm_management import MLLMManager, parse_mllm_alt_text_response
|
|
|
|
invalid_json_input_msg = "Invalid JSON format"
|
|
unexpected_error_msg = "Unexpected Error: could not end the process"
|
|
|
|
|
|
class ExtractImages(BaseModel):
|
|
page_url: str = "https://www.bbc.com"
|
|
number_of_images: int = 10
|
|
|
|
|
|
class ExtractImagesRoutes:
|
|
|
|
def __init__(self):
|
|
|
|
self.router = APIRouter()
|
|
|
|
self.router.add_api_route(
|
|
"/extract_images",
|
|
self.extract_images,
|
|
methods=["POST"],
|
|
tags=["Basic Elaboration"],
|
|
description="extract images from a webpage",
|
|
name="Extract images and context",
|
|
dependencies=[],
|
|
)
|
|
|
|
logging.info("extract images routes correctly initialized.")
|
|
|
|
async def extract_images(
|
|
self, request: Request, data: ExtractImages
|
|
) -> JSONResponse:
|
|
"""Return the alt text validation assessment based on WCAG guidelines"""
|
|
print("Received extract images request.")
|
|
try:
|
|
json_content = json.loads(data.model_dump_json())
|
|
|
|
# ---------------------
|
|
|
|
# Create extractor
|
|
image_extractor = ImageExtractor(
|
|
json_content["page_url"],
|
|
context_levels=0,
|
|
pixel_distance_threshold=0,
|
|
number_of_images=json_content["number_of_images"],
|
|
save_images="False",
|
|
save_images_path="",
|
|
)
|
|
# Extract images
|
|
logging.info(f"Extracting images from: {json_content['page_url']}")
|
|
images = await image_extractor.extract_images(extract_context=False)
|
|
|
|
returned_object = {
|
|
"images": images,
|
|
}
|
|
|
|
return JSONResponse(content=returned_object, status_code=200)
|
|
|
|
except json.JSONDecodeError:
|
|
logging.error(invalid_json_input_msg)
|
|
return JSONResponse(
|
|
content={"error": invalid_json_input_msg}, status_code=400
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.error(unexpected_error_msg + " %s", e)
|
|
return JSONResponse(
|
|
content={"error": unexpected_error_msg}, status_code=500
|
|
)
|