1033 lines
44 KiB
Python
1033 lines
44 KiB
Python
"""
|
||
WCAG 2.5.8 Target Size (Minimum) checker using Playwright.
|
||
|
||
Extracts all interactive GUI elements from a page and flags those smaller than:
|
||
- 24×24 px → hard minimum (SC 2.5.8 Level AA)
|
||
- 44×44 px → best-practice (SC 2.5.5 Level AAA)
|
||
|
||
For every scroll-viewport that contains at least one violation, two images are
|
||
saved to disk and base64-encoded for LLM forwarding:
|
||
- original_viewport_<NNN>.png raw screenshot (no annotations)
|
||
- annotated_viewport_<NNN>.png same screenshot with coloured circles
|
||
|
||
##########################
|
||
# the code flow is as follows:
|
||
extract_target_size:
|
||
|
||
return {
|
||
"page_url": self.url,
|
||
"small_elements_24": _to_element_records(fails_24), #all small elements in the format [{"tag": str, "xpath":str, "width": float, "height": float,"top": float, "left": float},...]
|
||
"small_elements_44": _to_element_records(fails_44_only),
|
||
"elements_with_24_failure": _to_element_records( #all small elements that intersect by circles
|
||
filtered_small_elements_24
|
||
),
|
||
"elements_with_44_failure": _to_element_records(
|
||
filtered_small_elements_44
|
||
),
|
||
"elements_with_24_failure_target_intersection": _to_element_records(# all small elements that intersect with any target element
|
||
filtered_small_elements_24_with_target_intersection
|
||
),
|
||
"annotated_viewports": annotated_viewports,# array of each scroll_y where there is at least one potential violation
|
||
}
|
||
|
||
|
||
where each entry in annotated_viewports is built in:
|
||
_build_annotated_viewports (foreach scroll_y where there is at least one potential violation):
|
||
|
||
{
|
||
"scroll_y": scroll_y,
|
||
"original_b64": original_b64,
|
||
"annotated_b64": annotated_b64,
|
||
"original_path": original_path,
|
||
"annotated_path": annotated_path,
|
||
"elements_24_count": len(vp_24),#dimesnion-based potential failures in the viewport strip (red circles)
|
||
"elements_44_count": len(vp_44_only),#dimesnion-based potential failures in the viewport strip (orange circles)
|
||
"elements_24_intersected_count": vp_24_intersected.count(True),#effective failures counter in the viewport strip, where red circles intersect with each other (i.e. multiple small elements clustered together)
|
||
"elements_44_intersected_count": vp_44_intersected.count(True),#effective failures counter in the viewport strip, where orange circles intersect with each other (i.e. multiple small elements clustered together)
|
||
"elements_24_intersected": vp_24_intersected,#effective failures in the viewport strip, where red circles intersect with each other (i.e. multiple small elements clustered together)
|
||
"elements_44_intersected": vp_44_intersected,#effective failures in the viewport strip, where orange circles intersect with each other (i.e. multiple small elements clustered together)
|
||
"elements_24_intersected_with_target_count": vp_24_intersected_with_target.count(True),#effective failures counter in the viewport strip, where red circles intersect with any target element in the viewport (small elements directly adjacent to target elements)
|
||
"elements_24_intersected_with_target": vp_24_intersected_with_target,#effective failures in the viewport strip, where red circles intersect with any target element in the viewport (small elements directly adjacent to target elements)
|
||
|
||
}
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import base64
|
||
import io
|
||
import math
|
||
import re
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
from urllib.parse import urlparse
|
||
import pandas as pd
|
||
import cv2
|
||
import numpy as np
|
||
from PIL import Image
|
||
from playwright.async_api import Page, async_playwright
|
||
import argparse
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Image helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _png_bytes_to_pil(png_bytes: bytes) -> Image.Image:
|
||
return Image.open(io.BytesIO(png_bytes))
|
||
|
||
|
||
def _pil_to_png_bytes(img: Image.Image) -> bytes:
|
||
buf = io.BytesIO()
|
||
img.save(buf, format="PNG")
|
||
return buf.getvalue()
|
||
|
||
|
||
def _encode_pil(img: Image.Image) -> str:
|
||
"""Base64-encode a PIL Image as PNG."""
|
||
return base64.b64encode(_pil_to_png_bytes(img)).decode("utf-8")
|
||
|
||
|
||
def _draw_circles_on_bytes(
|
||
png_bytes: bytes,
|
||
elements: List[Dict[str, Any]],
|
||
color_bgr: tuple,
|
||
radius: int = 14,
|
||
thickness: int = 2,
|
||
) -> Image.Image:
|
||
"""
|
||
Decode *png_bytes*, draw a circle for each element, return a PIL Image.
|
||
|
||
Element dicts must have keys: left, top, width, height
|
||
(all in viewport-relative coordinates).
|
||
"""
|
||
arr = np.frombuffer(png_bytes, dtype=np.uint8)
|
||
img_cv = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||
|
||
for el in elements:
|
||
cx = int(el["left"] + el["width"] / 2)
|
||
cy = int(el["top"] + el["height"] / 2)
|
||
cv2.circle(img_cv, (cx, cy), radius, color_bgr, thickness)
|
||
|
||
return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
|
||
|
||
|
||
def _draw_bounding_box_on_bytes(
|
||
png_bytes: bytes,
|
||
elements: List[Dict[str, Any]],
|
||
thickness: int = 2,
|
||
) -> Image.Image:
|
||
|
||
arr = np.frombuffer(png_bytes, dtype=np.uint8)
|
||
img_cv = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||
|
||
for target in elements:
|
||
x = int(target["left"])
|
||
y = int(target["top"])
|
||
w = int(target["width"])
|
||
h = int(target["height"])
|
||
cv2.rectangle(
|
||
img_cv, (x, y), (x + w, y + h), (0, 255, 0), thickness
|
||
) # green rectangles for target elements
|
||
|
||
return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
|
||
|
||
|
||
def _any_same_color_circles_intersect(
|
||
elements: List[Dict[str, Any]],
|
||
radius: int,
|
||
) -> bool:
|
||
"""Return a list of booleans, one per element in `elements`.
|
||
Each entry is True if that element's circle intersects with
|
||
any circle centered on a target element, False otherwise.
|
||
"""
|
||
values_to_return = [] # for debugging: track which pairs intersect
|
||
centers = [
|
||
(el["left"] + el["width"] / 2, el["top"] + el["height"] / 2) for el in elements
|
||
]
|
||
min_dist_sq = (2 * radius) ** 2
|
||
|
||
for i in range(len(centers)):
|
||
print(f"Checking circle {i} at {centers[i]} against the rest...")
|
||
internal_cicle_intersection = False
|
||
x1, y1 = centers[i]
|
||
# for j in range(i + 1, len(centers)):
|
||
for j in range(
|
||
len(centers)
|
||
): # also check prior circles to count all intersections, not just unique pairs
|
||
if i == j:
|
||
continue # skip self-comparison
|
||
x2, y2 = centers[j]
|
||
dx = x1 - x2
|
||
dy = y1 - y2
|
||
if dx * dx + dy * dy < min_dist_sq:
|
||
print(""" → Circles intersect! """, i, "and", j)
|
||
values_to_return.append(True)
|
||
internal_cicle_intersection = True
|
||
break # no need to check further for this circle, one intersection is enough to flag it as effective
|
||
|
||
else:
|
||
|
||
internal_cicle_intersection = False
|
||
if not internal_cicle_intersection:
|
||
values_to_return.append(False)
|
||
print(" → No circles intersect.")
|
||
|
||
return values_to_return
|
||
|
||
|
||
def _any_color_circles_intersect_target(
|
||
elements: List[Dict[str, Any]],
|
||
target: List[Dict[str, Any]],
|
||
radius: int,
|
||
) -> List[bool]:
|
||
"""Check WCAG 2.2 SC 2.5.8 spacing compliance: circle-to-target intersection.
|
||
|
||
Per WCAG 2.2 SC 2.5.8 (Target Size Minimum), a 24 CSS pixel diameter
|
||
circle centered on each undersized target's bounding box must not intersect
|
||
another target's bounding box.
|
||
|
||
For each element in `elements`, a circle of the given `radius` is centered
|
||
on its bounding box and checked for intersection against the bounding box
|
||
of each element in `target`. Intersection is determined analytically
|
||
from viewport-relative coordinates, without pixel rendering.
|
||
|
||
Args:
|
||
elements: Undersized annotation elements to check, each with 'left',
|
||
'top', 'width', and 'height' keys (viewport-relative CSS px).
|
||
target: Target elements to check against, with the same coordinate keys.
|
||
radius: Radius in CSS pixels of the compliance circle (typically 12,
|
||
for a 24 CSS px diameter circle per SC 2.5.8).
|
||
|
||
Returns:
|
||
A list of booleans of length len(elements). Entry i is True if the
|
||
circle centered on elements[i] intersects the bounding box of at least
|
||
one target element — i.e. that element FAILS the SC 2.5.8 spacing
|
||
requirement. False means the element passes.
|
||
|
||
Reference:
|
||
https://www.w3.org/TR/WCAG22/#target-size-minimum
|
||
"""
|
||
values_to_return = []
|
||
centers = [
|
||
(el["left"] + el["width"] / 2, el["top"] + el["height"] / 2) for el in elements
|
||
]
|
||
|
||
for i, (x1, y1) in enumerate(centers):
|
||
print(f"Checking circle {i} at {centers[i]} against targets...")
|
||
intersects_any_target = False
|
||
|
||
for j, t in enumerate(target):
|
||
# Skip self-comparison if elements and target lists overlap. This should never been happened in the current use case, as we only check circles for elements that are failing 24 px against all target elements, which include those that are not failing 24 px, but this is a safety check to avoid false positives in case of any overlap.
|
||
#if (elements[i]["left"] == t["left"] and elements[i]["top"] == t["top"]):
|
||
if (elements[i]["xpath"] == t["xpath"]):#more robust check based on xpath, as there could be multiple elements with the same coordinates (e.g. stacked buttons) but they would have different xpaths
|
||
continue
|
||
# Clamp circle center to the target bounding box
|
||
closest_x = max(t["left"], min(x1, t["left"] + t["width"]))
|
||
closest_y = max(t["top"], min(y1, t["top"] + t["height"]))
|
||
|
||
dx = x1 - closest_x
|
||
dy = y1 - closest_y
|
||
if dx * dx + dy * dy < radius**2:
|
||
print(
|
||
f" → FAIL: circle {i} intersects target {j} bounding box left:{t['left']}, top:{t['top']}"
|
||
)
|
||
intersects_any_target = True
|
||
break # no need to check further targets for this circle, one intersection is enough to flag it as failing the spacing requirement
|
||
|
||
values_to_return.append(intersects_any_target)
|
||
if not intersects_any_target:
|
||
print(f" → PASS: circle {i} does not intersect any target.")
|
||
|
||
return values_to_return
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# JS that collects ALL interactive elements in a single evaluate() call
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
_COLLECT_ELEMENTS_JS = """
|
||
() => {
|
||
|
||
|
||
|
||
|
||
const INTERACTIVE_SELECTOR = [
|
||
"select", "textarea", "datalist", "output", "meter", "progress",
|
||
"details", "summary", "menu", "menuitem",
|
||
"button",
|
||
"input[type='button']", "input[type='submit']", "input[type='reset']",
|
||
"input[type='checkbox']", "input[type='radio']",
|
||
"input[type='search']",
|
||
"a[href]", "option",
|
||
//"[role='img']", #images are decorative, "never" SC 2.5.8 targets
|
||
"[role='button']",
|
||
"[role='checkbox']", "[role='gridcell']",
|
||
"[role='link']", "[role='menuitem']",
|
||
"[role='menuitemcheckbox']", "[role='menuitemradio']",
|
||
"[role='option']", "[role='radio']",
|
||
"[role='searchbox']", "[role='switch']",
|
||
"[role='tab']", "[role='treeitem']",
|
||
].join(", ");
|
||
|
||
// Tags that are purely decorative and should be resolved to their interactive parent
|
||
|
||
const DECORATIVE_CLASS_RE = /icon/i;
|
||
|
||
const SELECTOR = INTERACTIVE_SELECTOR + ', i, svg, [class*="icon"]';
|
||
|
||
|
||
function isInlineText(el) {
|
||
// WCAG 2.5.8 inline exception: "The target is in a sentence."
|
||
//
|
||
// An element is in a sentence when its direct siblings (within the same
|
||
// parent) include non-whitespace text nodes — i.e. it is literally
|
||
// embedded in running prose, not just placed inside a block wrapper.
|
||
|
||
const parent = el.parentElement;
|
||
if (!parent) return false;
|
||
|
||
for (const node of parent.childNodes) {
|
||
if (node === el) continue;
|
||
if (node.nodeType === Node.TEXT_NODE && node.textContent.trim().length > 0) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
|
||
function isVisible(el) {
|
||
const s = window.getComputedStyle(el);
|
||
if (s.display === "none" || s.visibility === "hidden" ||
|
||
parseFloat(s.opacity) === 0) return false;
|
||
const r = el.getBoundingClientRect();
|
||
return r.width > 0 && r.height > 0;
|
||
}
|
||
|
||
function getXPath(node) {
|
||
if (node.id) return `//*[@id="${node.id}"]`;
|
||
if (node === document.body) return "/html/body";
|
||
const parent = node.parentElement;
|
||
if (!parent) return "";
|
||
const siblings = [...parent.children].filter(c => c.tagName === node.tagName);
|
||
const idx = siblings.indexOf(node) + 1;
|
||
const step = siblings.length > 1
|
||
? `${node.tagName.toLowerCase()}[${idx}]`
|
||
: node.tagName.toLowerCase();
|
||
return `${getXPath(parent)}/${step}`;
|
||
}
|
||
|
||
// Walk up the DOM to find the nearest interactive ancestor (or self)
|
||
function resolveTarget(el) {
|
||
const tag = el.tagName.toUpperCase();
|
||
|
||
// <i> and <svg> are always decorative regardless of class
|
||
const alwaysDecorative = tag === "I" || tag === "SVG" || tag === "IMG";
|
||
|
||
// <span> is decorative only when it carries an icon class and nothing more
|
||
const spanIcon = tag === "SPAN" &&
|
||
DECORATIVE_CLASS_RE.test(el.className?.toString?.() ?? "");
|
||
|
||
if (!alwaysDecorative && !spanIcon) return el; // interactive, keep as-is
|
||
|
||
let cursor = el.parentElement;
|
||
while (cursor && cursor !== document.body) {
|
||
if (cursor.matches(INTERACTIVE_SELECTOR)) return cursor;
|
||
cursor = cursor.parentElement;
|
||
}
|
||
|
||
return null; // decorative with no interactive parent → discard
|
||
}
|
||
|
||
const seen = new Set();
|
||
const results = [];
|
||
|
||
document.querySelectorAll(SELECTOR).forEach(el => {
|
||
// Resolve decorative elements (i, svg, icons) to their interactive parent
|
||
const target = resolveTarget(el);
|
||
|
||
if (!target) return; // ← decorative with no interactive parent
|
||
if (seen.has(target)) return; // ← already processed
|
||
if (!isVisible(target)) return;
|
||
if (isInlineText(target)) return; // ← WCAG 2.5.8 inline exception: "The target is in a sentence."
|
||
|
||
seen.add(target);
|
||
|
||
const r = target.getBoundingClientRect();
|
||
results.push({
|
||
tag: target.outerHTML.slice(0, 300),
|
||
xpath: getXPath(target),
|
||
width: r.width,
|
||
height: r.height,
|
||
top: r.top + window.scrollY,
|
||
left: r.left + window.scrollX
|
||
});
|
||
});
|
||
|
||
return results;
|
||
}
|
||
"""
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Output-directory helper
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _make_output_dir(base_dir: str, url: str) -> Path:
|
||
"""
|
||
Create and return <base_dir>/<sanitised-hostname>_<YYYYMMDD_HHMMSS>/
|
||
"""
|
||
hostname = re.sub(r"[^\w.-]", "_", urlparse(url).hostname or "unknown")
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
out_dir = Path(base_dir) / f"{hostname}_{timestamp}"
|
||
out_dir.mkdir(parents=True, exist_ok=True)
|
||
return out_dir
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Pure helper functions
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _elements_in_strip(elements: List[Dict], top_abs: int, bot_abs: int) -> List[Dict]:
|
||
"""Return elements whose vertical centre falls within [top_abs, bot_abs)."""
|
||
return [
|
||
el for el in elements if top_abs <= (el["top"] + el["height"] / 2) < bot_abs
|
||
]
|
||
|
||
|
||
def _to_viewport_coords(elements: List[Dict], scroll_y: int) -> List[Dict]:
|
||
"""Shift absolute document top → viewport-relative top for drawing."""
|
||
return [{**el, "top": el["top"] - scroll_y} for el in elements]
|
||
|
||
|
||
def _to_element_records(elements: List[Dict]) -> List[Dict[str, Any]]:
|
||
"""Keep only the fields relevant for the LLM element list (no images)."""
|
||
return [
|
||
{
|
||
"tag": el["tag"],
|
||
"xpath": el["xpath"],
|
||
"width": el["width"],
|
||
"height": el["height"],
|
||
"top": el["top"],
|
||
"left": el["left"],
|
||
}
|
||
for el in elements
|
||
]
|
||
|
||
async def _dismiss_cookie_banner(page: Page, timeout: int = 3_000) -> None:
|
||
"""
|
||
Try to accept/close cookie banners using multiple strategies.
|
||
Silently continues if nothing is found — never raises.
|
||
"""
|
||
|
||
# ── Strategy 1: known framework-specific selectors ────────────────────────
|
||
# Ordered by specificity: most precise first
|
||
KNOWN_SELECTORS = [
|
||
# OneTrust
|
||
"#onetrust-accept-btn-handler",
|
||
# Cookiebot
|
||
"#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
||
# Didomi
|
||
"#didomi-notice-agree-button",
|
||
# iubenda
|
||
".iubenda-cs-accept-btn",
|
||
# Quantcast
|
||
".qc-cmp2-summary-buttons button:first-child",
|
||
# Usercentrics (shadow DOM — handled in strategy 3)
|
||
# TrustArc
|
||
".truste_overlay .pdynamicbutton .call",
|
||
# Generic patterns
|
||
"#accept-cookies",
|
||
"#cookie-accept",
|
||
"#btn-accept-cookies",
|
||
".cookie-accept",
|
||
".accept-cookies",
|
||
".js-accept-cookies",
|
||
]
|
||
|
||
for selector in KNOWN_SELECTORS:
|
||
try:
|
||
btn = page.locator(selector).first
|
||
if await btn.is_visible(timeout=timeout):
|
||
await btn.click(timeout=timeout)
|
||
await page.wait_for_timeout(500) # let the banner animate out
|
||
print(f"Dismissed cookie banner using selector: {selector}","strategy 1")
|
||
return
|
||
except Exception:
|
||
continue
|
||
|
||
# ── Strategy 2: text-based heuristic on visible buttons ──────────────────
|
||
ACCEPT_TEXTS = re.compile(
|
||
r"\b(accept|accetta|accetto|agree|allow|consent|ok\b|got it|"
|
||
r"capito|continua|proceed|acconsento|accepter|akzeptieren|autoriser|"
|
||
r"alle akzeptieren|tout accepter)\b",
|
||
re.IGNORECASE,
|
||
)
|
||
|
||
# Candidate containers — look inside these first, then fall back to body
|
||
COOKIE_ROOTS = (
|
||
"[id*='cookie'],[id*='consent'],[id*='gdpr'],[id*='onetrust'],"
|
||
"[id*='cookiebot'],[id*='didomi'],[id*='cmp'],[id*='iubenda'],"
|
||
"[class*='cookie'],[class*='consent'],[class*='gdpr'],"
|
||
"[class*='banner'],[class*='notice'],[class*='popup']"
|
||
)
|
||
|
||
try:
|
||
roots = await page.locator(COOKIE_ROOTS).all()
|
||
search_scope = roots if roots else [page.locator("body")]
|
||
|
||
for scope in search_scope:
|
||
buttons = await scope.locator("button, a[href='#'], [role='button']").all()
|
||
for btn in buttons:
|
||
try:
|
||
text = (await btn.inner_text(timeout=500)).strip()
|
||
if ACCEPT_TEXTS.search(text) and await btn.is_visible(timeout=500):
|
||
await btn.click(timeout=timeout)
|
||
await page.wait_for_timeout(500)
|
||
print(f"Dismissed cookie banner using text heuristic: '{text}'","strategy 2")
|
||
return
|
||
except Exception:
|
||
continue
|
||
except Exception:
|
||
pass
|
||
|
||
# ── Strategy 3: Usercentrics / shadow-DOM banners ─────────────────────────
|
||
try:
|
||
result = await page.evaluate("""
|
||
() => {
|
||
// Usercentrics uses a shadow root on <uc-ui-container>
|
||
const host = document.querySelector('uc-ui-container, #usercentrics-root');
|
||
if (!host?.shadowRoot) return false;
|
||
const btn = host.shadowRoot.querySelector(
|
||
'button[data-testid="uc-accept-all-button"]'
|
||
);
|
||
if (btn) { btn.click(); return true; }
|
||
return false;
|
||
}
|
||
""")
|
||
if result:
|
||
await page.wait_for_timeout(500)
|
||
print("Dismissed cookie banner using Usercentrics shadow DOM heuristic","strategy 3")
|
||
return
|
||
except Exception:
|
||
pass
|
||
|
||
# ── Strategy 4: inject and click via JS as last resort ───────────────────
|
||
try:
|
||
clicked = await page.evaluate("""
|
||
() => {
|
||
const ACCEPT_RE = /\\b(accept|accetta|agree|allow|consent|ok|
|
||
acconsento|accepter|akzeptieren)\\b/i;
|
||
const candidates = [
|
||
...document.querySelectorAll('button, [role="button"], a')
|
||
];
|
||
for (const el of candidates) {
|
||
const text = el.innerText?.trim() ?? "";
|
||
const rect = el.getBoundingClientRect();
|
||
const visible = rect.width > 0 && rect.height > 0;
|
||
if (visible && ACCEPT_RE.test(text)) {
|
||
el.click();
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
""")
|
||
if clicked:
|
||
print("Dismissed cookie banner using injected JS heuristic","strategy 4")
|
||
await page.wait_for_timeout(500)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main extractor
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TargetSizeExtractor:
|
||
"""
|
||
WCAG 2.5.8 / 2.5.5 target-size checker powered by Playwright.
|
||
|
||
Parameters
|
||
----------
|
||
url : page to audit
|
||
output_dir : root folder where a timestamped sub-directory is created
|
||
to hold original and annotated viewport PNGs.
|
||
Pass ``None`` to skip disk persistence entirely.
|
||
viewport_width : browser viewport width (default 1440)
|
||
viewport_height : browser viewport height (default 900)
|
||
"""
|
||
|
||
THRESHOLD_MIN = 24 # SC 2.5.8 AA
|
||
THRESHOLD_ENHANCED = 44 # SC 2.5.5 AAA / best-practice
|
||
|
||
# OpenCV BGR colours
|
||
COLOR_24 = (0, 0, 255) # red – hard failure (< 24 px)
|
||
COLOR_44 = (0, 128, 255) # orange – soft failure (≥ 24 px, < 44 px)
|
||
|
||
def __init__(
|
||
self,
|
||
url: str,
|
||
output_dir: Optional[str] = "wcag_output",
|
||
viewport_width: int = 1440,
|
||
viewport_height: int = 900,
|
||
):
|
||
self.url = url
|
||
self.output_dir = output_dir
|
||
self.viewport_width = viewport_width
|
||
self.viewport_height = viewport_height
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public API
|
||
# ------------------------------------------------------------------
|
||
|
||
async def extract_target_size(self) -> Dict[str, Any]:
|
||
"""
|
||
Audit the page and return a result dict.
|
||
|
||
Return shape
|
||
------------
|
||
{
|
||
"page_url": str,
|
||
|
||
# Elements failing the 24 px hard minimum — no images here
|
||
"small_elements_24": [
|
||
{"tag": str, "xpath":str, "width": float, "height": float,
|
||
"top": float, "left": float},
|
||
...
|
||
],
|
||
|
||
# Elements passing 24 px but failing the 44 px best-practice
|
||
"small_elements_44": [ ...same shape... ],
|
||
|
||
# One entry per viewport strip with ≥1 violation
|
||
"annotated_viewports": [
|
||
{
|
||
"scroll_y": int,
|
||
"original_b64": str, # raw PNG, base64
|
||
"annotated_b64": str, # circles drawn, base64
|
||
"original_path": str | None, # disk path
|
||
"annotated_path": str | None, # disk path
|
||
"elements_24_count": int,
|
||
"elements_44_count": int,
|
||
...
|
||
},
|
||
...
|
||
],
|
||
}
|
||
"""
|
||
#out_dir = (
|
||
# _make_output_dir(self.output_dir, self.url) if self.output_dir else None
|
||
#)
|
||
out_dir= (self.output_dir if self.output_dir else None)
|
||
|
||
async with async_playwright() as p:
|
||
browser = await p.chromium.launch(headless=True)
|
||
page = await browser.new_page(
|
||
viewport={
|
||
"width": self.viewport_width,
|
||
"height": self.viewport_height,
|
||
}
|
||
)
|
||
|
||
try:
|
||
await page.goto(self.url, timeout=50_000, wait_until="domcontentloaded")
|
||
|
||
# ── Auto-dismiss cookie banners ───────────────────────────────────────────────
|
||
await _dismiss_cookie_banner(page)
|
||
|
||
# 1. Single JS round-trip → all element geometries
|
||
all_elements: List[Dict[str, Any]] = await page.evaluate(
|
||
_COLLECT_ELEMENTS_JS
|
||
)
|
||
|
||
# 2. Classify by threshold
|
||
fails_24 = [
|
||
el
|
||
for el in all_elements
|
||
if el["width"] < self.THRESHOLD_MIN
|
||
or el["height"] < self.THRESHOLD_MIN
|
||
]
|
||
fails_44_only = [
|
||
el
|
||
for el in all_elements
|
||
if (
|
||
el["width"] >= self.THRESHOLD_MIN
|
||
and el["height"] >= self.THRESHOLD_MIN
|
||
)
|
||
and (
|
||
el["width"] < self.THRESHOLD_ENHANCED
|
||
or el["height"] < self.THRESHOLD_ENHANCED
|
||
)
|
||
]
|
||
|
||
# remove elements with negative top coordinate, which are likely to be false positives (e.g. off-screen elements or measurement errors) and can cause issues in the viewport processing loop
|
||
fails_24 = [
|
||
el for el in fails_24 if el["top"] >= 0
|
||
] # filter out elements with negative top coordinate, which are likely to be false positives (e.g. off-screen elements or measurement errors) and can cause issues in the viewport processing loop
|
||
fails_44_only = [
|
||
el for el in fails_44_only if el["top"] >= 0
|
||
] # same for 44 px failures
|
||
|
||
# 3. Screenshot + annotate per viewport strip+ filter on intersections with circles to identify effective failures
|
||
annotated_viewports = await self._build_annotated_viewports(
|
||
page, all_elements, fails_24, fails_44_only, out_dir
|
||
)
|
||
|
||
# 4. filter the element lists to keep only those that are in viewports with effective failures (circles intersecting other circles or other targets)
|
||
elements_with_24_failure = []
|
||
elements_with_44_failure = []
|
||
element_with_24_failure_target_intersection = []
|
||
for vp_i, vp in enumerate(annotated_viewports):
|
||
elements_with_24_failure.extend(vp["elements_24_intersected"])
|
||
elements_with_44_failure.extend(vp["elements_44_intersected"])
|
||
element_with_24_failure_target_intersection.extend(
|
||
vp["elements_24_intersected_with_target"]
|
||
)
|
||
|
||
print("directly from extract_target_size() results:")
|
||
print(
|
||
f"Total elements with verified 24×24 px failure (red circles intersecting) : {sum(elements_with_24_failure)}"
|
||
)
|
||
print(
|
||
f"Total elements with verified 44×44 px failure (orange circles intersecting) : {sum(elements_with_44_failure)}"
|
||
)
|
||
print(
|
||
f"Total elements with verified 24×24 px failure intersecting with any target element in the viewport : {sum(element_with_24_failure_target_intersection)}"
|
||
)
|
||
print()
|
||
|
||
filtered_small_elements_24 = [
|
||
el for i, el in enumerate(fails_24) if elements_with_24_failure[i]
|
||
] # filter the small elements to keep only those that are in viewports with effective 24 px failures (red circles intersecting)
|
||
filtered_small_elements_44 = [
|
||
el
|
||
for i, el in enumerate(fails_44_only)
|
||
if elements_with_44_failure[i]
|
||
]
|
||
filtered_small_elements_24_with_target_intersection = [
|
||
el
|
||
for i, el in enumerate(fails_24)
|
||
if element_with_24_failure_target_intersection[i]
|
||
]
|
||
|
||
if (
|
||
out_dir is not None
|
||
): # also save the filtered element lists to CSV for easier inspection
|
||
all_elements_pandas = pd.DataFrame(_to_element_records(all_elements))
|
||
all_elements_pandas.to_csv(
|
||
str(out_dir) + "/all_interactive_elements.csv", index=False, sep=";"
|
||
)
|
||
small_elements_24_pandas = pd.DataFrame(
|
||
_to_element_records(fails_24)
|
||
)
|
||
small_elements_24_pandas.to_csv(
|
||
str(out_dir) + "/small_elements_24.csv", index=False, sep=";"
|
||
)
|
||
|
||
small_elements_44_only_pandas = pd.DataFrame(
|
||
_to_element_records(fails_44_only)
|
||
)
|
||
small_elements_44_only_pandas.to_csv(
|
||
str(out_dir) + "/small_elements_44.csv", index=False, sep=";"
|
||
)
|
||
|
||
small_elements_24_filtered_pandas = pd.DataFrame(
|
||
_to_element_records(filtered_small_elements_24)
|
||
)
|
||
small_elements_24_filtered_pandas.to_csv(
|
||
str(out_dir) + "/small_elements_24_filtered.csv",
|
||
index=False,
|
||
sep=";",
|
||
)
|
||
|
||
small_elements_44_filtered_pandas = pd.DataFrame(
|
||
_to_element_records(filtered_small_elements_44)
|
||
)
|
||
small_elements_44_filtered_pandas.to_csv(
|
||
str(out_dir) + "/small_elements_44_filtered.csv",
|
||
index=False,
|
||
sep=";",
|
||
)
|
||
|
||
small_elements_24_filtered_with_target_intersection_pandas = (
|
||
pd.DataFrame(
|
||
_to_element_records(
|
||
filtered_small_elements_24_with_target_intersection
|
||
)
|
||
)
|
||
)
|
||
small_elements_24_filtered_with_target_intersection_pandas.to_csv(
|
||
str(out_dir)
|
||
+ "/small_elements_24_filtered_with_target_intersection.csv",
|
||
index=False,
|
||
sep=";",
|
||
)
|
||
|
||
# 4. Element records
|
||
return {
|
||
"page_url": self.url,
|
||
"small_elements_24": _to_element_records(fails_24),
|
||
"small_elements_44": _to_element_records(fails_44_only),
|
||
"elements_with_24_failure": _to_element_records(
|
||
filtered_small_elements_24
|
||
),
|
||
"elements_with_44_failure": _to_element_records(
|
||
filtered_small_elements_44
|
||
),
|
||
"elements_with_24_failure_target_intersection": _to_element_records(
|
||
filtered_small_elements_24_with_target_intersection
|
||
),
|
||
"annotated_viewports": annotated_viewports,
|
||
}
|
||
|
||
except Exception as exc:
|
||
print(f"[TargetSizeExtractor] error: {exc}")
|
||
return {"error": str(exc)}
|
||
|
||
finally:
|
||
await browser.close()
|
||
|
||
# ------------------------------------------------------------------
|
||
# Internal – viewport loop
|
||
# ------------------------------------------------------------------
|
||
|
||
async def _build_annotated_viewports(
|
||
self,
|
||
page: Page,
|
||
all_elements: List[Dict],
|
||
fails_24: List[Dict],
|
||
fails_44_only: List[Dict],
|
||
out_dir: Optional[Path],
|
||
) -> List[Dict[str, Any]]:
|
||
"""
|
||
Scroll through the full page height one viewport at a time.
|
||
|
||
For each strip that contains ≥1 violation:
|
||
• capture the raw screenshot → original
|
||
• draw orange circles (44-only violations) ↗
|
||
• draw red circles (24 violations) on top → annotated
|
||
• save both PNGs to out_dir (if set)
|
||
• base64-encode both for the return value
|
||
|
||
Only strips with at least one violation are included in the result.
|
||
"""
|
||
total_height: int = await page.evaluate("document.body.scrollHeight")
|
||
vh = self.viewport_height
|
||
num_viewports = math.ceil(total_height / vh)
|
||
results: List[Dict[str, Any]] = []
|
||
|
||
for i in range(num_viewports):
|
||
scroll_y = i * vh
|
||
top_abs = scroll_y
|
||
bot_abs = scroll_y + vh
|
||
|
||
vp_24 = _elements_in_strip(fails_24, top_abs, bot_abs)
|
||
print(
|
||
f"Viewport {i:03d} scroll_y={scroll_y:5d}px fails_24 in strip: {len(vp_24)}"
|
||
)
|
||
vp_44_only = _elements_in_strip(fails_44_only, top_abs, bot_abs)
|
||
|
||
#vp_all_rest = _elements_in_strip(
|
||
# [el for el in all_elements if el not in vp_24], top_abs, bot_abs
|
||
#) # all elements in the strip that are not failing 24 px, to check if circles intersect with any of them as well. Only for success criteria 24 px, as 44 px is a best practice and not a hard failure
|
||
vp_all = _elements_in_strip(all_elements, top_abs, bot_abs)# all elements in the strip, to check if circles intersect with any of them as well
|
||
|
||
if not vp_24 and not vp_44_only:
|
||
print(f" [viewport {i:03d}] scroll_y={scroll_y:5d}px no violations")
|
||
continue # no violations in this strip, skip
|
||
|
||
# Scroll to position and capture raw screenshot
|
||
await page.evaluate(f"window.scrollTo(0, {scroll_y})")
|
||
await page.wait_for_timeout(150) # let layout settle
|
||
raw_png: bytes = await page.screenshot(full_page=False)
|
||
|
||
# Convert absolute document coords → viewport-relative for drawing
|
||
vp_24_rel = _to_viewport_coords(vp_24, scroll_y)
|
||
vp_44_only_rel = _to_viewport_coords(vp_44_only, scroll_y)
|
||
|
||
#vp_all_rel = _to_viewport_coords(vp_all_rest, scroll_y)
|
||
vp_all_rel = _to_viewport_coords(vp_all, scroll_y)
|
||
|
||
annotated_img = _draw_bounding_box_on_bytes(raw_png, vp_all_rel)
|
||
|
||
# Draw orange first (larger radius) → red on top
|
||
annotated_img = _draw_circles_on_bytes(
|
||
_pil_to_png_bytes(annotated_img),
|
||
vp_44_only_rel,
|
||
self.COLOR_44,
|
||
radius=self.THRESHOLD_ENHANCED // 2, # 22 px
|
||
)
|
||
annotated_img = _draw_circles_on_bytes(
|
||
_pil_to_png_bytes(annotated_img),
|
||
vp_24_rel,
|
||
self.COLOR_24,
|
||
radius=self.THRESHOLD_MIN // 2, # 12 px
|
||
)
|
||
|
||
print(
|
||
f" [viewport check intersections {i:03d}] scroll_y={scroll_y:5d}px "
|
||
f"fails_24={len(vp_24_rel):3d} fails_44={len(vp_44_only_rel):3d} "
|
||
f"checking circle intersections..."
|
||
)
|
||
print(f" - {len(vp_24_rel)} red circles potential (fails 24 px)")
|
||
vp_24_intersected = _any_same_color_circles_intersect(
|
||
vp_24_rel, self.THRESHOLD_MIN // 2
|
||
)
|
||
print(
|
||
f" - red circles effective (fails 24 px)",
|
||
vp_24_intersected.count(True),
|
||
vp_24_intersected,
|
||
)
|
||
|
||
print(
|
||
f" - {len(vp_44_only)} orange circles potential (fails 44 px only)"
|
||
)
|
||
vp_44_intersected = _any_same_color_circles_intersect(
|
||
vp_44_only_rel, self.THRESHOLD_ENHANCED // 2
|
||
)
|
||
print(
|
||
f" - orange circles effective (fails 44 px only)",
|
||
vp_44_intersected.count(True),
|
||
vp_44_intersected,
|
||
)
|
||
|
||
# Additionally, check if any circles (of any color) intersect with the target elements in the viewport to flag them as effective failures
|
||
print(
|
||
f"- Start checking if any 24 px circles intersect with any target element in the viewport to flag them as effective failures..."
|
||
)
|
||
vp_24_intersected_with_target = _any_color_circles_intersect_target(
|
||
vp_24_rel, vp_all_rel, self.THRESHOLD_MIN // 2
|
||
)
|
||
# vp_44_intersected_with_target=_any_color_circles_intersect_target(vp_44_only_rel, vp_all_rel, self.THRESHOLD_ENHANCED // 2)# this check is not strictly necessary as 44 px
|
||
print(
|
||
f" - red circles intersecting with any target element in the viewport (fails 24 px) ",
|
||
vp_24_intersected_with_target.count(True),
|
||
vp_24_intersected_with_target,
|
||
)
|
||
# print(f" - orange circles intersecting with any target element in the viewport (fails 44 px only) ", vp_44_intersected_with_target.count(True),vp_44_intersected_with_target)
|
||
|
||
# Encode
|
||
original_b64 = base64.b64encode(raw_png).decode("utf-8")
|
||
annotated_b64 = _encode_pil(annotated_img)
|
||
|
||
# Persist to disk
|
||
original_path = None
|
||
annotated_path = None
|
||
if out_dir is not None:
|
||
original_path = str(out_dir / f"original_viewport_{i:03d}.png")
|
||
annotated_path = str(out_dir / f"annotated_viewport_{i:03d}.png")
|
||
_png_bytes_to_pil(raw_png).save(original_path)
|
||
annotated_img.save(annotated_path)
|
||
print(
|
||
f" [viewport {i:03d}] scroll_y={scroll_y:5d}px "
|
||
f"potential fails_24={len(vp_24):3d} potential fails_44={len(vp_44_only):3d} "
|
||
f"effective fails_24 circles intersections ={ vp_24_intersected.count(True):3d} effective fails_44={vp_44_intersected.count(True):3d} "
|
||
f"effective fails_24 circles intersecting with any target element in the viewport ={ vp_24_intersected_with_target.count(True):3d} "
|
||
f"→ {out_dir.name}/"
|
||
)
|
||
|
||
results.append(
|
||
{
|
||
"scroll_y": scroll_y,
|
||
"original_b64": original_b64,
|
||
"annotated_b64": annotated_b64,
|
||
"original_path": original_path,
|
||
"annotated_path": annotated_path,
|
||
"elements_24_count": len(vp_24),
|
||
"elements_44_count": len(vp_44_only),
|
||
"elements_24_intersected_count": vp_24_intersected.count(True),
|
||
"elements_44_intersected_count": vp_44_intersected.count(True),
|
||
"elements_24_intersected": vp_24_intersected,
|
||
"elements_44_intersected": vp_44_intersected,
|
||
"elements_24_intersected_with_target_count": vp_24_intersected_with_target.count(
|
||
True
|
||
),
|
||
"elements_24_intersected_with_target": vp_24_intersected_with_target,
|
||
}
|
||
)
|
||
|
||
return results
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# CLI test
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
async def main(args) -> None:
|
||
url = args.page_url
|
||
base_output_dir = "wcag_output"
|
||
# check multiple risolutions by running the demo multiple times with different viewport sizes, e.g. 800x600, 1440x900, 1920x1080, etc.
|
||
viewport_resolutions = [(390, 844), (768,1024), (1920, 1080)]
|
||
total_results = []
|
||
output_dir = _make_output_dir(base_output_dir, url)
|
||
for viewport_width, viewport_height in viewport_resolutions:
|
||
child_dir = f"viewport_{viewport_width}x{viewport_height}"
|
||
output_dir_resolution = Path(output_dir) / f"{child_dir}"
|
||
output_dir_resolution.mkdir(parents=True, exist_ok=True)
|
||
print(f"\nRunning TargetSizeExtractor demo for URL:{url}, viewport {viewport_width}x{viewport_height} saving in output_dir {output_dir_resolution}...")
|
||
|
||
extractor = TargetSizeExtractor(
|
||
url, output_dir=output_dir_resolution, viewport_width=viewport_width, viewport_height=viewport_height
|
||
)
|
||
result = await extractor.extract_target_size()
|
||
|
||
if "error" in result:
|
||
print("Error:", result["error"])
|
||
return
|
||
total_results.append(result)
|
||
|
||
for r,result in enumerate(total_results):
|
||
print("Results for viewport_resolutions:", viewport_resolutions[r])
|
||
vps = result["annotated_viewports"]
|
||
print()
|
||
print(f"URL : {result['page_url']}")
|
||
print(f"Elements < 24×24 px : {len(result['small_elements_24'])}")
|
||
print(f"Elements 24–44 px range : {len(result['small_elements_44'])}")
|
||
print(f"Elements with verified 24×24 px failure (red circles intersecting) : {len(result['elements_with_24_failure'])}")
|
||
print(f"Elements with verified 44×44 px failure (orange circles intersecting) : {len(result['elements_with_44_failure'])}")
|
||
print(f"Elements with verified 24×24 px failure intersecting with any target element in the viewport : {len(result['elements_with_24_failure_target_intersection'])}")
|
||
print(f"Viewports with violations : {len(vps)}")
|
||
print()
|
||
|
||
for vp_i, vp in enumerate(vps):
|
||
|
||
print(
|
||
f" Problematic Viewport {vp_i} scroll_y={vp['scroll_y']:5d}px "
|
||
f"potential fails_24={vp['elements_24_count']} "
|
||
f"potential fails_44={vp['elements_44_count']}"
|
||
f" effective fails_24={vp['elements_24_intersected_count']} "
|
||
f" effective fails_44={vp['elements_44_intersected_count']}"
|
||
f" effective fails_24 intersecting with any target element in the viewport={vp['elements_24_intersected_with_target_count']}"
|
||
)
|
||
#if vp["original_path"]:
|
||
# print(f" original → {vp['original_path']}")
|
||
# print(f" annotated → {vp['annotated_path']}")
|
||
|
||
print()
|
||
|
||
# print("First 5 hard-fail elements (< 24 px):")
|
||
# for el in result["small_elements_24"]:#[:5]:
|
||
# print(f" {el['width']:.0f}×{el['height']:.0f} px @ ({el['top']:.0f}-{el['left']:.0f}) {el['tag'][:500]}")
|
||
|
||
|
||
|
||
if __name__ == "__main__":
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument(
|
||
"--page_url",
|
||
type=str,
|
||
help=("Url page to analyze"),
|
||
default="https://www.bbc.com",
|
||
)
|
||
args = parser.parse_args()
|
||
asyncio.run(main(args))
|