diff --git a/UI/dependences_ui/utils.py b/UI/dependences_ui/utils.py index dd3bf70..4561038 100644 --- a/UI/dependences_ui/utils.py +++ b/UI/dependences_ui/utils.py @@ -25,8 +25,13 @@ def hash_password(password): """Hash password using SHA-256""" return hashlib.sha256(password.encode()).hexdigest() +def associate_user_with_manager(users, user_assignment_manager): + user_list=users.keys() + print(f"registering--Associating users with manager: {list(user_list)}") + user_assignment_manager.register_active_users(list(user_list)) -def register_user(username, password, confirm_password): + +def register_user(username, password, confirm_password,user_assignment_manager): """Register a new user""" if not username or not password: return "", "Username and password cannot be empty!", None @@ -45,6 +50,11 @@ def register_user(username, password, confirm_password): users[username] = hash_password(password) save_users(users) + try: + associate_user_with_manager(users, user_assignment_manager) + except Exception as e: + print(f"Error associating user with manager: {e}") + return "", f"✅ Registration successful! You can now login.", None diff --git a/UI/requirements_UI.txt b/UI/requirements_UI.txt index 87efef4..f49f9c9 100644 --- a/UI/requirements_UI.txt +++ b/UI/requirements_UI.txt @@ -2,4 +2,5 @@ gradio==5.49.1 pandas==2.3.3 python-dotenv==1.2.1 requests==2.32.5 -gradio-modal==0.0.4 \ No newline at end of file +gradio-modal==0.0.4 +openpyxl==3.1.5 \ No newline at end of file diff --git a/UI/user_task_assignment/README.md b/UI/user_task_assignment/README.md new file mode 100644 index 0000000..9627ca0 --- /dev/null +++ b/UI/user_task_assignment/README.md @@ -0,0 +1,404 @@ +# User Assignment Manager Integration + +## Overview + +This package provides automatic user-to-assignment management for your alt-text assessment system. When new users register/activate, the system automatically: + +1. **Checks current capacity** - How many users are already managed +2. **Expands if needed** - Runs AssignmentSystem Round 2+ mode when new users exceed current capacity +3. **Stores mappings** - Saves all user-to-site-images mappings in SQLite database +4. **Maintains files** - Keeps JSON/XLSX files updated with same names (no versioning needed) +5. **Tracks history** - Logs all expansion events for auditing + +## Files in this Package + +### Core System Files + +| File | Purpose | +|------|---------| +| `user_assignment_manager.py` | Main manager class - handles all assignment logic | +| `user_registration_integration.py` | Flask/FastAPI integration examples and UserService wrapper | +| `alt_text_assignment_target_overlap_multiple_round.py` | Assignment algorithm (existing - DO NOT MODIFY) | + +### Configuration & Data Files + +| File | Purpose | +|------|---------| +| `sites_config.json` | Configuration of websites and images per user | +| `alt_text_assignments_output_target_overlap.json` | Current assignments (updated by system) | +| `alt_text_assignments_output_target_overlap.xlsx` | Excel export of assignments (updated) | +| `wcag_validator_ui.db` | SQLite database with user-assignment mappings (auto-created) | + +### Documentation Files + +| File | Purpose | +|------|---------| +| `README.md` | This file - overview and quick reference | +| `QUICK_START.md` | Step-by-step integration guide with code examples | +| `USER_ASSIGNMENT_GUIDE.txt` | Comprehensive documentation (500+ lines) | + +### Testing & Examples + +| File | Purpose | +|------|---------| +| `test_user_assignment_manager.py` | Test suite to verify functionality | + +## Quick Usage + +### Installation + +```bash +pip install openpyxl +``` + +### Initialization + +```python +from user_assignment_manager import UserAssignmentManager + +manager = UserAssignmentManager( + db_path="path/to/wcag_validator_ui.db", + config_json_path="sites_config.json", + assignments_json_path="alt_text_assignments_output_target_overlap.json", + assignments_xlsx_path="alt_text_assignments_output_target_overlap.xlsx" +) +``` + +### Register Users (with auto-expansion) + +```python +# When users activate, call this with ALL active users +active_users = ["user1", "user2", "user3", "user4", "user5"] +assignments = manager.register_active_users(active_users) + +# If count exceeds previous capacity: +# - AssignmentSystem runs Round 2+ mode +# - New assignments generated +# - JSON/XLSX files updated +# - Database populated +``` + +### Get User Assignments + +```python +user_assignments = manager.get_user_assignments("user1") +# Returns: {"https://site1.com": [1, 2, 3, 4, 5, 6], ...} +``` + +### Check Statistics + +```python +stats = manager.get_statistics() +history = manager.get_generation_history() +``` + +## Database Schema + +### Table: `user_assignments` +Stores user-to-image-assignment mappings +- `user_id` - Unique user identifier +- `site_url` - Website URL +- `image_indices` - JSON array of image numbers +- `created_at` / `updated_at` - Timestamps + +### Table: `assignment_generation_log` +Tracks expansion events +- `generation_round` - Round number (1, 2, 3, ...) +- `users_before` - User count before expansion +- `users_after` - User count after expansion +- `new_users_added` - How many new users in this round +- `generated_at` - When expansion occurred + +## Integration Points + +### Option 1: Direct API Usage +```python +manager.register_active_users(user_list) +assignments = manager.get_user_assignments(user_id) +``` + +### Option 2: Flask Integration +```python +from user_registration_integration import get_flask_blueprint + +app.register_blueprint( + get_flask_blueprint(manager), + url_prefix="/api/assignments" +) +# Endpoints: GET /api/assignments/user/, POST /api/assignments/activate +``` + +### Option 3: FastAPI Integration +```python +from user_registration_integration import create_fastapi_router + +app.include_router( + create_fastapi_router(manager), + prefix="/api/assignments" +) +``` + +### Option 4: UserService Wrapper +```python +from user_registration_integration import UserService + +service = UserService(manager) +service.on_users_activated(["user1", "user2", ...]) +``` + +## How Auto-Expansion Works + +### Before +``` +Managed users: 15 +Current JSON has assignments for: user1 to user15 +``` + +### User Registration Event +```python +# New users activate +manager.register_active_users(["user1", ..., "user15", "user16", "user17", "user18"]) +# Count: 18 users > 15 managed → Expansion triggered! +``` + +### Round 2+ Execution +1. **Load**: Read existing assignments from XLSX (user1-user15) +2. **Generate**: Create new assignments for user16-user18 using AssignmentSystem +3. **Ensure**: Maintain target_overlap >= 2 images between users per site +4. **Save**: Write ALL assignments to JSON/XLSX (same filenames) +5. **Persist**: Insert new users into SQLite database +6. **Log**: Record expansion event in generation_generation_log table + +### After +``` +Managed users: 18 +JSON/XLSX: Completely updated with user1 to user18 +Database: Contains all 18 users +Log: Round 2 event recorded (15→18, +3 users) +``` + +## System Capabilities + +| Capability | Implementation | +|------------|-----------------| +| **Automatic Scaling** | No manual user management needed | +| **Round 2+ Support** | Built-in, triggered automatically | +| **Database Persistence** | SQLite with fast indexed lookups | +| **File Compatibility** | Same JSON/XLSX format as original system | +| **History Tracking** | Complete generation audit trail | +| **Statistics** | Real-time coverage metrics per site | +| **Multi-site Support** | Handles multiple assessment websites | +| **Reproducible** | Configurable seed for reproducibility | +| **Error Handling** | Graceful fallbacks and detailed logging | + +## Typical Workflow + +``` +1. Day 1: System initialized with 15 users + └─ JSON has assignments for user1-user15 + └─ Database populated from JSON + +2. Day 5: 8 more users register/activate + └─ Call: register_active_users(["user1", ..., "user23"]) + └─ Detected: 23 users > 15 managed + └─ Action: Run Round 2+ with 8 new users + └─ Result: JSON/XLSX updated with user1-user23 + └─ Database: 23 users now managed + +3. Day 10: Check on capacity + └─ managed_count = 23 + └─ Still serving all 23 users + └─ No expansion needed + +4. Day 15: 7 more users activate + └─ Call: register_active_users(["user1", ..., "user30"]) + └─ Detected: 30 users > 23 managed + └─ Action: Run Round 3 with 7 new users + └─ Result: JSON/XLSX updated with user1-user30 + └─ Database: 30 users now managed + +5. At any time: Check statistics + └─ Total users managed: 30 + └─ Coverage per site: 30 users each + └─ Expansion history: 3 events tracked +``` + +## Key Features + +### ✓ Transparent Scaling +Users don't care about capacity - system handles it automatically + +### ✓ Maintains Constraints +Respects `target_overlap` parameter for inter-rater reliability + +### ✓ Backward Compatible +Uses same JSON/XLSX format as original `alt_text_assignment_target_overlap.py` + +### ✓ Auditable +Complete history of all expansions with timestamps + +### ✓ Predictable Behavior +Uses same AssignmentSystem algorithm for consistency + +### ✓ Web-Ready +Flask/FastAPI examples provided for easy integration + +## Common Tasks + +### Get current managed user count +```python +count = manager.get_managed_user_count() +``` + +### Get all user IDs in system +```python +all_users = manager.get_all_user_ids() +``` + +### Check if user has assignments +```python +assignments = manager.get_user_assignments(user_id) +if assignments: + print("User has assignments") +else: + print("User needs to be registered") +``` + +### Monitor expansion history +```python +history = manager.get_generation_history() +for event in history: + print(f"Round {event['generation_round']}: " + f"{event['users_before']}→{event['users_after']} users") +``` + +### Export current statistics +```python +import json +stats = manager.get_statistics() +print(json.dumps(stats, indent=2)) +``` + +## Configuration + +### Default Paths (relative to script location) +``` +Database: ../persistence/wcag_validator_ui.db +Config JSON: sites_config.json +Assignments: alt_text_assignments_output_target_overlap.json +Excel Export: alt_text_assignments_output_target_overlap.xlsx +``` + +### Custom Paths +```python +manager = UserAssignmentManager( + db_path="/custom/db.db", + config_json_path="/custom/config.json", + assignments_json_path="/custom/out.json", + assignments_xlsx_path="/custom/out.xlsx" +) +``` + +### Parameters +```python +target_overlap=2 # Min images shared between users per site +seed=42 # Random seed for reproducibility +``` + +## Testing + +### Run test suite +```bash +python test_user_assignment_manager.py +``` + +### Expected output +- All initialization tests pass +- Database schema created +- Existing assignments loaded +- Auto-expansion triggered (if user count > current) +- Statistics retrieved +- History logged + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| `ModuleNotFoundError: openpyxl` | `pip install openpyxl` | +| `No assignments found for user` | Call `register_active_users()` with user in list | +| `Round 2+ not triggered` | Check that new user count > current managed count | +| `Database locked` | SQLite handles this. Check file permissions. | +| `File not found` | Verify config paths are correct and readable | + +See `USER_ASSIGNMENT_GUIDE.txt` section on TROUBLESHOOTING for more details. + +## Documentation + +- **Quick Start**: [QUICK_START.md](QUICK_START.md) - Step-by-step integration with examples +- **Full Guide**: [USER_ASSIGNMENT_GUIDE.txt](USER_ASSIGNMENT_GUIDE.txt) - Comprehensive reference (500+ lines) +- **API Reference**: [user_assignment_manager.py](user_assignment_manager.py) - Docstrings and type hints +- **Examples**: [user_registration_integration.py](user_registration_integration.py) - Flask/FastAPI examples + +## Architecture + +``` +┌─────────────────────────────────────┐ +│ User Registration System │ +│ (Your Flask/FastAPI/Django app) │ +└────────────┬────────────────────────┘ + │ + └─ register_active_users([user_ids]) + │ + ┌──────────▼───────────┐ + │ UserAssignmentManager│ + ├──────────────────────┤ + │ • Check capacity │ + │ • Trigger expansion │ + │ • Persist to DB │ + │ • Update files │ + │ • Log events │ + └──────────┬───────────┘ + │ + ┌───────────────┼───────────────┐ + │ │ │ + ▼ ▼ ▼ + SQLite DB JSON File XLSX File + user_assignments + generation_log assignments Excel GUI +``` + +## Performance + +- **User lookup**: O(1) via indexed SQLite query +- **Registration**: O(n) where n = number of active users +- **Expansion**: O(n) for Round 2+ generation (background operation) +- **Database size**: ~10-20KB per 1000 users + +## Support + +For questions or issues: + +1. Check [USER_ASSIGNMENT_GUIDE.txt](USER_ASSIGNMENT_GUIDE.txt) TROUBLESHOOTING section +2. Review [QUICK_START.md](QUICK_START.md) examples +3. Run `python test_user_assignment_manager.py` to verify setup +4. Check database directly: `sqlite3 wcag_validator_ui.db` + +## Version + +- **Current**: 1.0 (2025-02-07) +- **Python**: 3.7+ +- **Dependencies**: openpyxl + +## Next Steps + +1. Install openpyxl: `pip install openpyxl` +2. Read [QUICK_START.md](QUICK_START.md) for integration steps +3. Run test suite to verify setup +4. Integrate `register_active_users()` in your user auth flow +5. Start using `get_user_assignments()` to retrieve tasks + +--- + +**Created**: February 7, 2025 +**Updated**: February 7, 2025 +**Status**: Ready for integration diff --git a/UI/user_task_assignment/alt_text_assignment_target_overlap_multiple_round.py b/UI/user_task_assignment/alt_text_assignment_target_overlap_multiple_round.py new file mode 100644 index 0000000..dd1cb1e --- /dev/null +++ b/UI/user_task_assignment/alt_text_assignment_target_overlap_multiple_round.py @@ -0,0 +1,966 @@ +# create user assignments with a hard target_overlap constraint, then export to XLSX/JSON +# round 1: python alt_text_assignment_target_overlap_multiple_round.py --config_json sites_config.json --n_users 15 --output_xlsx alt_text_assignments_output_target_overlap.xlsx --output_json alt_text_assignments_output_target_overlap.json +# round >1: python alt_text_assignment_target_overlap_multiple_round.py --config_json sites_config.json --n_users 5 --output_xlsx alt_text_assignments_output_target_overlap_round2.xlsx --output_json alt_text_assignments_output_target_overlap_round2.json --input_assignments alt_text_assignments_output_target_overlap.xlsx + +""" +Alt-Text Validation User Test Assignment System +================================================ +Assigns image subsets to evaluators with a trade-off between: + - Coverage : each image gets seen by as many users as needed + - Inter-rater : every user is guaranteed to share >= target_overlap + images with at least one other user per site + (HARD constraint, not a soft nudge) + - Load balance : image view counts are kept as even as possible + +Algorithm +--------- +For each new user and each site: + + Phase 1 — HARD FILTER + If there are already users assigned, keep only blocks that share + >= target_overlap images with at least one existing user. + (First user is exempt — no existing users to overlap with.) + If no block passes the filter (pool too small), fall back to the + best available block and emit a warning. + + Phase 2 — SCORE among surviving candidates + score = w_new * (images with zero views) + + w_load * (-average view count in block) ← load balance + Overlap is no longer in the score — it is enforced by the filter. + +Parameters +---------- +target_overlap : int + MINIMUM number of images a new user must share with at least one + existing user on each site. This is now a hard constraint. + Typical values: 2–4. Set to 0 to disable. + +Usage example +------------- + python alt_text_assignment_target_overlap_multiple_round.py --config_json sites_config.json --n_users 5 --target_overlap 2 + +# first round: start fresh with no previous assignments +python alt_text_assignment_target_overlap_multiple_round.py \ + --config_json sites.json \ + --n_users 5 \ + --output_xlsx round1_output.xlsx + +# second round: load previous assignments to continue building on top +python aalt_text_assignment_target_overlap_multiple_round.py \ + --input_assignments round1_output.xlsx \ + --config_json sites.json \ + --n_users 5 \ + --output_xlsx round2_output.xlsx + +""" + +import json +import random +import argparse +from pathlib import Path +from collections import defaultdict +from typing import Dict, List, Optional, Tuple + +import openpyxl +from openpyxl.styles import PatternFill, Font, Alignment, Border, Side +from openpyxl.utils import get_column_letter + + +# ───────────────────────────────────────────────────────────────────────────── +# Data model +# ───────────────────────────────────────────────────────────────────────────── + + +class SiteConfig: + """ + Configuration for a single website. + + Parameters + ---------- + url : str + URL of the web page being evaluated. + allowed_images : list of int + Explicit list of usable image indices for this site. + Gaps are fine: e.g. [1, 3, 5, 7, 10, 12]. + You can also pass a contiguous range via list(range(1, n+1)). + images_per_user : int + How many images each evaluator is assigned on this site. + """ + + def __init__(self, url: str, allowed_images: List[int], images_per_user: int = 6): + if not allowed_images: + raise ValueError(f"Site '{url}': allowed_images must not be empty.") + if images_per_user > len(allowed_images): + raise ValueError( + f"Site '{url}': images_per_user ({images_per_user}) " + f"exceeds allowed pool size ({len(allowed_images)})." + ) + self.url = url + self.allowed_images: List[int] = sorted(set(allowed_images)) + self.images_per_user = images_per_user + + @property + def total_images(self) -> int: + """Number of usable images (size of the allowed pool).""" + return len(self.allowed_images) + + +# ───────────────────────────────────────────────────────────────────────────── +# Assignment system +# ───────────────────────────────────────────────────────────────────────────── + + +class AssignmentSystem: + """ + Manages image assignments across users and sites. + + Parameters + ---------- + sites : list of SiteConfig + target_overlap : int + HARD minimum number of images a new user must share with at + least one existing user per site. Set 0 to disable. + seed : int + Random seed for reproducibility. + """ + + def __init__( + self, + sites: List[SiteConfig], + target_overlap: int = 2, + seed: int = 42, + ): + self.sites = {s.url: s for s in sites} + self.target_overlap = target_overlap + self.seed = seed + random.seed(seed) + + # assignments[user_id][site_url] = sorted list of image indices + self.assignments: Dict[str, Dict[str, List[int]]] = {} + + # Warnings emitted when the hard constraint could not be satisfied + self.warnings: List[str] = [] + + # Pre-build the sliding-window block catalogue for each site + self._blocks: Dict[str, List[List[int]]] = {} + self._build_blocks() + + # Feasibility check: warn if pool geometry makes the constraint impossible + self._check_feasibility() + + # ── Block construction ──────────────────────────────────────────────────── + + def _build_blocks(self): + """ + Build all candidate blocks for each site using a sliding window + of size images_per_user moved by step=1. + + Using step=1 generates the maximum number of distinct blocks, + giving the assignment algorithm the most freedom to simultaneously + satisfy the hard overlap constraint and the load-balance objective. + """ + for url, cfg in self.sites.items(): + images = cfg.allowed_images + k = cfg.images_per_user + blocks = [] + for start in range(len(images) - k + 1): + blocks.append(images[start : start + k]) + self._blocks[url] = blocks + + def _check_feasibility(self): + """ + Check whether target_overlap can be guaranteed given pool geometry. + + By the pigeonhole principle, any two blocks of size k drawn from a + pool of n images must share at least max(0, 2k - n) images. + So if 2*images_per_user - pool_size < target_overlap, it is + mathematically impossible to guarantee the constraint for all pairs. + """ + if self.target_overlap <= 0: + return + for url, cfg in self.sites.items(): + n, k, t = cfg.total_images, cfg.images_per_user, self.target_overlap + guaranteed = max(0, 2 * k - n) + if guaranteed < t: + needed_k = (n + t + 1) // 2 # smallest k s.t. 2k-n >= t + msg = ( + f"FEASIBILITY WARNING [{url.split('/')[-1]}]: " + f"pool={n}, images_per_user={k} → guaranteed min overlap={guaranteed} " + f"(pigeonhole), but target_overlap={t}. " + f"The constraint CANNOT be met for all user pairs. " + f"To guarantee it, increase images_per_user to >= {needed_k} " + f"or reduce target_overlap to <= {guaranteed}." + ) + self.warnings.append(msg) + print(msg) + + # ── Assignment logic ────────────────────────────────────────────────────── + + def _score_block( + self, + block: List[int], + site_url: str, + existing_assignments: Dict[str, List[int]], + w_new: float = 1.0, + w_load: float = 0.8, + ) -> float: + """ + Score a candidate block purely on coverage and load balance. + Overlap is handled upstream as a hard filter, not here. + + score = w_new * (images with zero views so far) + + w_load * (-average view count of images in block) + """ + cfg = self.sites[site_url] + view_count: Dict[int, int] = {img: 0 for img in cfg.allowed_images} + for assigned in existing_assignments.values(): + for img in assigned: + if img in view_count: + view_count[img] += 1 + + block_set = set(block) + new_images = sum(1 for img in block_set if view_count[img] == 0) + avg_views = sum(view_count[img] for img in block_set) / len(block_set) + + return w_new * new_images + w_load * (-avg_views) + + def _select_block( + self, + url: str, + existing_assignments: Dict[str, List[int]], + ) -> List[int]: + """ + Two-phase block selection for one site: + + Phase 1 — HARD FILTER + Keep only blocks that share >= target_overlap images with + at least one existing user. + Skipped when there are no existing users yet. + + Phase 2 — SCORE + Among survivors, pick the block with the highest + coverage/load-balance score. + """ + blocks = self._blocks[url] + + if existing_assignments and self.target_overlap > 0: + # Phase 1: hard overlap filter + eligible = [ + b + for b in blocks + if any( + len(set(b) & set(assigned)) >= self.target_overlap + for assigned in existing_assignments.values() + ) + ] + + if not eligible: + # Constraint cannot be satisfied — warn and fall back + msg = ( + f"WARNING [{url}]: no block satisfies target_overlap=" + f"{self.target_overlap} with existing users. " + f"Pool may be too small. Falling back to best available block." + ) + self.warnings.append(msg) + print(msg) + eligible = blocks + else: + # First user — no overlap constraint yet + eligible = blocks + + # Phase 2: score among eligible blocks + return max( + eligible, key=lambda b: self._score_block(b, url, existing_assignments) + ) + + def add_user(self, user_id: str) -> Dict[str, List[int]]: + """ + Assign image blocks to a new user across all sites. + + Returns + ------- + dict : {site_url: [image_indices]} + """ + if user_id in self.assignments: + raise ValueError(f"User '{user_id}' already exists.") + + assignment = {} + for url in self.sites: + existing = { + uid: self.assignments[uid][url] + for uid in self.assignments + if url in self.assignments[uid] + } + best_block = self._select_block(url, existing) + assignment[url] = sorted(best_block) + + self.assignments[user_id] = assignment + return assignment + + def add_users_bulk(self, user_ids: List[str]) -> Dict[str, Dict[str, List[int]]]: + """Add multiple users in sequence.""" + return {uid: self.add_user(uid) for uid in user_ids} + + # ── Statistics ──────────────────────────────────────────────────────────── + + def coverage_stats(self) -> Dict[str, Dict]: + """ + Per-site statistics: + - allowed_pool_size : total usable images + - images_covered : images assigned to ≥1 user + - coverage_pct : % coverage + - avg_views_per_image: average times each image is seen + - avg_pairwise_overlap: mean shared images across all user pairs + - min_pairwise_overlap: worst-case pair (inter-rater minimum) + - max_pairwise_overlap: best-case pair + - pairs_below_target : number of user pairs sharing < target_overlap + """ + stats = {} + for url, cfg in self.sites.items(): + all_images = set(cfg.allowed_images) + seen_count: Dict[int, int] = defaultdict(int) + user_sets = [] + + for uid, asgn in self.assignments.items(): + imgs = set(asgn.get(url, [])) + user_sets.append(imgs) + for img in imgs: + seen_count[img] += 1 + + covered = sum(1 for img in all_images if seen_count[img] > 0) + avg_views = sum(seen_count.values()) / len(all_images) if all_images else 0 + + overlaps = [ + len(user_sets[i] & user_sets[j]) + for i in range(len(user_sets)) + for j in range(i + 1, len(user_sets)) + ] + avg_overlap = sum(overlaps) / len(overlaps) if overlaps else 0 + pairs_below = sum(1 for o in overlaps if o < self.target_overlap) + + stats[url] = { + "allowed_pool_size": cfg.total_images, + "allowed_images": cfg.allowed_images, + "images_per_user": cfg.images_per_user, + "target_overlap": self.target_overlap, + "users_assigned": len(user_sets), + "images_covered": covered, + "coverage_pct": round(100 * covered / cfg.total_images, 1), + "avg_views_per_image": round(avg_views, 2), + "avg_pairwise_overlap": round(avg_overlap, 2), + "min_pairwise_overlap": min(overlaps) if overlaps else 0, + "max_pairwise_overlap": max(overlaps) if overlaps else 0, + "pairs_below_target": pairs_below, + "total_pairs": len(overlaps), + } + return stats + + def overlap_matrix(self, site_url: str) -> Tuple[List[str], List[List[int]]]: + """Return the pairwise overlap matrix for a given site.""" + users = list(self.assignments.keys()) + matrix = [ + [ + len( + set(self.assignments[u1].get(site_url, [])) + & set(self.assignments[u2].get(site_url, [])) + ) + for u2 in users + ] + for u1 in users + ] + return users, matrix + + # ── I/O ────────────────────────────────────────────────────────────────── + + def to_json(self, path: str): + """Save assignments + stats to JSON.""" + data = { + "target_overlap": self.target_overlap, + "assignments": self.assignments, + "stats": self.coverage_stats(), + "warnings": self.warnings, + } + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + print(f"Saved JSON → {path}") + + def to_xlsx(self, path: str): + """ + Export assignments to Excel: + Sheet 'assignments' : one row per user, columns grouped by site + Sheet 'stats' : coverage/overlap statistics per site + Sheet 'overlap_matrices': colour-coded pairwise overlap per site + """ + wb = openpyxl.Workbook() + + site_fills = [ + PatternFill("solid", fgColor="D6E4F7"), + PatternFill("solid", fgColor="E2F0D9"), + PatternFill("solid", fgColor="FCE4D6"), + PatternFill("solid", fgColor="FFF2CC"), + PatternFill("solid", fgColor="F2E0FA"), + ] + header_font = Font(bold=True, color="FFFFFF") + bold_font = Font(bold=True) + center = Alignment(horizontal="center", vertical="center", wrap_text=True) + thin = Side(style="thin", color="AAAAAA") + border = Border(left=thin, right=thin, top=thin, bottom=thin) + + def sc(cell, fill=None, font=None, align=None): + if fill: + cell.fill = fill + if font: + cell.font = font + if align: + cell.alignment = align + cell.border = border + + blue = PatternFill("solid", fgColor="2F5496") + + # ── Sheet 1: assignments ────────────────────────────────────────────── + ws = wb.active + ws.title = "assignments" + urls = list(self.sites.keys()) + + ws.cell(1, 1, "User ID") + sc(ws.cell(1, 1), fill=blue, font=header_font, align=center) + ws.column_dimensions["A"].width = 16 + + col = 2 + site_start_cols: Dict[str, int] = {} + for idx, url in enumerate(urls): + k = self.sites[url].images_per_user + site_start_cols[url] = col + ws.merge_cells( + start_row=1, start_column=col, end_row=1, end_column=col + k - 1 + ) + c = ws.cell(1, col, f"pagina: {url}") + fill = site_fills[idx % len(site_fills)] + sc(c, fill=fill, font=bold_font, align=center) + for j in range(k): + ws.column_dimensions[get_column_letter(col + j)].width = 6 + sc(ws.cell(1, col + j), fill=fill, font=bold_font, align=center) + col += k + + ws.cell(2, 1, "# image to assess") + sc(ws.cell(2, 1), font=bold_font, align=center) + for url in urls: + k = self.sites[url].images_per_user + c = ws.cell(2, site_start_cols[url], "# image to assess") + sc(c, font=bold_font, align=center) + ws.merge_cells( + start_row=2, + start_column=site_start_cols[url], + end_row=2, + end_column=site_start_cols[url] + k - 1, + ) + + for row_idx, (uid, asgn) in enumerate(self.assignments.items(), start=3): + ws.cell(row_idx, 1, uid) + sc(ws.cell(row_idx, 1), align=center) + for site_idx, url in enumerate(urls): + fill = site_fills[site_idx % len(site_fills)] + for j, img in enumerate(asgn.get(url, [])): + c = ws.cell(row_idx, site_start_cols[url] + j, img) + sc(c, fill=fill, align=center) + + ws.freeze_panes = "B3" + + # ── Sheet 2: stats ──────────────────────────────────────────────────── + ws2 = wb.create_sheet("stats") + stat_headers = [ + "Site", + "Allowed Pool", + "Images/User", + "Target Overlap", + "Users", + "Covered", + "Coverage %", + "Avg Views/Image", + "Avg Pair Overlap", + "Min Pair Overlap", + "Max Pair Overlap", + "Pairs Below Target", + "Total Pairs", + ] + for col_i, h in enumerate(stat_headers, 1): + c = ws2.cell(1, col_i, h) + sc(c, fill=blue, font=header_font, align=center) + ws2.column_dimensions[get_column_letter(col_i)].width = 18 + + stats = self.coverage_stats() + for row_i, (url, s) in enumerate(stats.items(), 2): + vals = [ + url, + s["allowed_pool_size"], + s["images_per_user"], + s["target_overlap"], + s["users_assigned"], + s["images_covered"], + s["coverage_pct"], + s["avg_views_per_image"], + s["avg_pairwise_overlap"], + s["min_pairwise_overlap"], + s["max_pairwise_overlap"], + s["pairs_below_target"], + s["total_pairs"], + ] + for col_i, v in enumerate(vals, 1): + c = ws2.cell(row_i, col_i, v) + # Highlight pairs_below_target in red if > 0 + if col_i == 12 and isinstance(v, int) and v > 0: + sc(c, fill=PatternFill("solid", fgColor="FFCCCC"), align=center) + else: + sc(c, align=center) + + ws2.freeze_panes = "A2" + + # ── Sheet 3: overlap matrices ───────────────────────────────────────── + ws3 = wb.create_sheet("overlap_matrices") + current_row = 1 + for url in urls: + users_list, matrix = self.overlap_matrix(url) + ws3.cell(current_row, 1, f"Overlap matrix — {url}") + ws3.cell(current_row, 1).font = Font(bold=True, size=11) + ws3.cell( + current_row, + len(users_list) + 2, + f"target_overlap = {self.target_overlap} " + f"(cells in red = pairs below target)", + ) + ws3.cell(current_row, len(users_list) + 2).font = Font( + italic=True, color="AA0000" + ) + current_row += 1 + + ws3.cell(current_row, 1, "User \\ User") + sc(ws3.cell(current_row, 1), fill=blue, font=header_font, align=center) + for j, u in enumerate(users_list, 2): + c = ws3.cell(current_row, j, u) + sc(c, fill=blue, font=header_font, align=center) + ws3.column_dimensions[get_column_letter(j)].width = 12 + ws3.column_dimensions["A"].width = 16 + current_row += 1 + + max_val = max((max(r) for r in matrix), default=1) + for i, (u, row_data) in enumerate(zip(users_list, matrix)): + sc( + ws3.cell(current_row, 1, u), + fill=blue, + font=header_font, + align=center, + ) + for j, val in enumerate(row_data, 2): + if i == j - 1: # diagonal + fill_hex = "DDDDDD" + elif val < self.target_overlap: # below target → red + fill_hex = "FFCCCC" + else: # gradient blue + intensity = int(200 - 150 * (val / max_val)) if max_val else 200 + fill_hex = f"{intensity:02X}{intensity:02X}FF" + c = ws3.cell(current_row, j, val) + sc(c, fill=PatternFill("solid", fgColor=fill_hex), align=center) + current_row += 1 + current_row += 2 + + wb.save(path) + print(f"Saved XLSX → {path}") + + @classmethod + def from_xlsx( + cls, path: str, images_per_user: int = 6, target_overlap: int = 2 + ) -> "AssignmentSystem": + """Load an existing assignment file (same format as exported).""" + wb = openpyxl.load_workbook(path) + ws = wb["assignments"] + rows = list(ws.iter_rows(values_only=True)) + header_row = rows[0] + + sites_info = [] + current_site = current_start = None + for col_idx, val in enumerate(header_row): + if val and str(val).startswith("pagina:"): + if current_site: + sites_info.append((current_site, current_start, col_idx - 1)) + current_site = val.replace("pagina:", "").strip() + current_start = col_idx + if current_site: + sites_info.append((current_site, current_start, len(header_row) - 1)) + + allowed_imgs: Dict[str, set] = defaultdict(set) + for row in rows[2:]: + if row[0] is None: + continue + for site, start, end in sites_info: + for v in row[start : end + 1]: + if isinstance(v, int): + allowed_imgs[site].add(v) + + site_configs = [ + SiteConfig( + url=s, + allowed_images=sorted(allowed_imgs[s]), + images_per_user=images_per_user, + ) + for s, _, _ in sites_info + ] + system = cls(sites=site_configs, target_overlap=target_overlap) + + for row in rows[2:]: + if row[0] is None: + continue + uid = str(row[0]) + asgn = {} + for site, start, end in sites_info: + imgs = sorted([v for v in row[start : end + 1] if isinstance(v, int)]) + asgn[site] = imgs + system.assignments[uid] = asgn + + return system + + +# ───────────────────────────────────────────────────────────────────────────── +# CLI helpers +# ───────────────────────────────────────────────────────────────────────────── + + +def print_summary(system: AssignmentSystem): + print("\n" + "=" * 70) + print("ASSIGNMENT SUMMARY") + print("=" * 70) + for uid, asgn in system.assignments.items(): + print(f"\n User: {uid}") + for url, imgs in asgn.items(): + print(f" {url.split('/')[-1]:<30} → images {imgs}") + + print("\n" + "=" * 70) + print(f"COVERAGE & OVERLAP STATISTICS (target_overlap = {system.target_overlap})") + print("=" * 70) + for url, s in system.coverage_stats().items(): + short = url.split("/")[-1] + flag = " ✗ CONSTRAINT VIOLATED" if s["pairs_below_target"] > 0 else " ✓" + print(f"\n Site: {short}") + print( + f" Images covered : {s['images_covered']}/{s['allowed_pool_size']} ({s['coverage_pct']}%)" + ) + print(f" Avg views/image : {s['avg_views_per_image']}") + print( + f" Avg pairwise overlap: {s['avg_pairwise_overlap']} " + f"(min={s['min_pairwise_overlap']}, max={s['max_pairwise_overlap']})" + ) + print( + f" Pairs below target : {s['pairs_below_target']}/{s['total_pairs']}{flag}" + ) + + if system.warnings: + print("\n" + "=" * 70) + print("WARNINGS") + print("=" * 70) + for w in system.warnings: + print(f" {w}") + + +def build_from_example_file( + input_path: str, n_new_users: int = 5, target_overlap: int = 2 +) -> AssignmentSystem: + """Seed from the reference Excel file, then add n_new_users.""" + wb = openpyxl.load_workbook(input_path) + ws = wb["situazione"] + rows = list(ws.iter_rows(values_only=True)) + header_row = rows[0] + + sites_raw = [] + current_site = current_start = None + for col_idx, val in enumerate(header_row): + if val and "giove" in str(val): + if current_site: + sites_raw.append((current_site, current_start, col_idx - 1)) + current_site = val.strip() + current_start = col_idx + if current_site: + sites_raw.append((current_site, current_start, len(header_row) - 2)) + + allowed_imgs: Dict[str, set] = defaultdict(set) + user_rows = [ + r for r in rows[2:] if r[0] is not None and str(r[0]).startswith("user") + ] + for row in user_rows: + for site, start, end in sites_raw: + for v in row[start : end + 1]: + if isinstance(v, int): + allowed_imgs[site].add(v) + + site_configs = [ + SiteConfig(url=s, allowed_images=sorted(allowed_imgs[s]), images_per_user=6) + for s, _, _ in sites_raw + ] + system = AssignmentSystem(sites=site_configs, target_overlap=target_overlap) + + for row in user_rows: + uid = str(row[0]) + asgn = { + site: sorted([v for v in row[start : end + 1] if isinstance(v, int)]) + for site, start, end in sites_raw + } + if any(asgn.values()): + system.assignments[uid] = asgn + + print(f"Loaded {len(system.assignments)} existing users from {input_path}") + + start_idx = len(system.assignments) + 1 + for uid in [f"new_user{i}" for i in range(start_idx, start_idx + n_new_users)]: + system.add_user(uid) + print(f" Added {uid}") + + return system + + +# ───────────────────────────────────────────────────────────────────────────── +# Main +# ───────────────────────────────────────────────────────────────────────────── + + +# ───────────────────────────────────────────────────────────────────────────── +# Round 2+ helper: load a previously exported assignments_output.xlsx +# ───────────────────────────────────────────────────────────────────────────── + + +def _load_previous_assignments(system: AssignmentSystem, path: str) -> AssignmentSystem: + """ + Read a previously exported assignments_output.xlsx (the format produced + by AssignmentSystem.to_xlsx) and seed system.assignments with the existing + user rows. The system must already have been constructed with the correct + SiteConfig objects (from --config_json) so the allowed pool is intact. + + Parameters + ---------- + system : AssignmentSystem + Already-constructed system with correct SiteConfig pool. + path : str + Path to the previously exported Excel file. + + Returns + ------- + The same system object, with existing assignments populated. + """ + wb = openpyxl.load_workbook(path) + if "assignments" not in wb.sheetnames: + raise ValueError( + f"Sheet 'assignments' not found in {path}. " + f"Is this a file produced by this script?" + ) + ws = wb["assignments"] + rows = list(ws.iter_rows(values_only=True)) + header_row = rows[0] # site URLs + # row index 1 is the sub-header "# image to assess" — skip it + + # Detect which columns belong to which site + site_urls_in_order = list(system.sites.keys()) + sites_info = [] # (url, col_start, col_end) 0-indexed + current_site = current_start = None + for col_idx, val in enumerate(header_row): + if val and str(val).startswith("pagina:"): + if current_site is not None: + sites_info.append((current_site, current_start, col_idx - 1)) + current_site = val.replace("pagina:", "").strip() + current_start = col_idx + if current_site is not None: + sites_info.append((current_site, current_start, len(header_row) - 1)) + + # Validate that the sites in the file match those in the system + file_urls = [s for s, _, _ in sites_info] + system_urls = list(system.sites.keys()) + if file_urls != system_urls: + raise ValueError( + f"Site URLs in {path} do not match the provided config_json.\n" + f" File : {file_urls}\n" + f" Config : {system_urls}" + ) + + loaded = 0 + for row in rows[2:]: # skip header rows + if row[0] is None: + continue + uid = str(row[0]) + asgn = {} + for site_url, col_start, col_end in sites_info: + imgs = sorted( + [v for v in row[col_start : col_end + 1] if isinstance(v, int)] + ) + if imgs: + asgn[site_url] = imgs + if asgn: + system.assignments[uid] = asgn + loaded += 1 + + print(f" Loaded {loaded} existing users from {path}") + return system + + +# ───────────────────────────────────────────────────────────────────────────── +# Main +# ───────────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Alt-Text Validation Assignment System", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +USAGE MODES +----------- +1)Round 1 (fresh start): + python alt_text_assignment.py --config_json sites.json --n_users 5 + +Round 2+ (continue from previous output): + python alt_text_assignment.py \ + --input_assignments assignments_output.xlsx \ + --config_json sites.json \ + --n_users 5 + +2)Seed from reference Excel (situazione format):# the excel file from the user test + python alt_text_assignment.py --input reference.xlsx --n_users 5 + +3)Via --n_images — falls back to the simple contiguous 1..N range demo. + """, + ) + + # ── Input modes ────────────────────────────────────────────────────────── + parser.add_argument( + "--input_assignments", + default=None, + metavar="XLSX", + help=( + "Round 2+: path to a previously exported assignments_output.xlsx. " + "Existing users are loaded as history; new users are added on top. " + "Must be combined with --config_json to restore the full allowed image pool." + ), + ) + parser.add_argument( + "--input", + default=None, + metavar="XLSX", + help="Seed from the original reference Excel (situazione sheet format).", + ) + parser.add_argument( + "--config_json", + default=None, + metavar="JSON", + help=( + 'JSON file: [{"url":"...","allowed_images":[1,3,...],"images_per_user":6},...]. ' + "Required when using --input_assignments." + ), + ) + + # ── Site dimensions (used only in fully synthetic mode) ─────────────────── + parser.add_argument( + "--n_sites", + type=int, + default=5, + help="Number of synthetic sites (demo mode only)", + ) + parser.add_argument( + "--n_images", + type=int, + default=40, + help="Images per synthetic site (contiguous 1..N)", + ) + parser.add_argument( + "--images_per_user", + type=int, + default=6, + help="Images per user per site (overridden by config_json per-site value)", + ) + + # ── Assignment parameters ───────────────────────────────────────────────── + parser.add_argument( + "--target_overlap", + type=int, + default=2, + help="Hard minimum shared images between any two users per site", + ) + parser.add_argument( + "--n_users", + type=int, + default=5, + help="Number of NEW users to assign in this round", + ) + parser.add_argument("--output_xlsx", default="assignments_output.xlsx") + parser.add_argument("--output_json", default="assignments_output.json") + parser.add_argument("--seed", type=int, default=42) + args = parser.parse_args() + + # ── Mode 1: Round 2+ — resume from previous output ─────────────────────── + if args.input_assignments: + if not args.config_json: + parser.error( + "--input_assignments requires --config_json to restore the " + "allowed image pool (it cannot be reliably inferred from " + "assignments alone)." + ) + print(f"\nRound 2+ mode") + print(f" Previous assignments : {args.input_assignments}") + print(f" Site config : {args.config_json}") + + with open(args.config_json, encoding="utf-8") as f: + site_defs = json.load(f) + sites = [ + SiteConfig( + url=sd["url"], + allowed_images=sd["allowed_images"], + images_per_user=sd.get("images_per_user", args.images_per_user), + ) + for sd in site_defs + ] + system = AssignmentSystem( + sites=sites, target_overlap=args.target_overlap, seed=args.seed + ) + system = _load_previous_assignments(system, args.input_assignments) + + existing_count = len(system.assignments) + new_ids = [f"user{existing_count + i}" for i in range(1, args.n_users + 1)] + print(f" Adding {args.n_users} new users: {new_ids[0]} .. {new_ids[-1]}") + for uid in new_ids: + system.add_user(uid) + + # ── Mode 2: Seed from reference Excel (situazione format) ──────────────── + elif args.input and Path(args.input).exists(): + print(f"\nReference Excel mode: {args.input}") + system = build_from_example_file(args.input, args.n_users, args.target_overlap) + + # ── Mode 3: Fresh start from config_json ───────────────────────────────── + elif args.config_json: + print(f"\nRound 1 (fresh): {args.config_json}") + with open(args.config_json, encoding="utf-8") as f: + site_defs = json.load(f) + sites = [ + SiteConfig( + url=sd["url"], + allowed_images=sd["allowed_images"], + images_per_user=sd.get("images_per_user", args.images_per_user), + ) + for sd in site_defs + ] + system = AssignmentSystem( + sites=sites, target_overlap=args.target_overlap, seed=args.seed + ) + system.add_users_bulk([f"user{i}" for i in range(1, args.n_users + 1)]) + + # ── Mode 4: Fully synthetic demo ───────────────────────────────────────── + else: + print(f"\nSynthetic demo: {args.n_sites} sites, images 1..{args.n_images}") + sites = [ + SiteConfig( + url=f"https://example.com/site{i}", + allowed_images=list(range(1, args.n_images + 1)), + images_per_user=args.images_per_user, + ) + for i in range(1, args.n_sites + 1) + ] + system = AssignmentSystem( + sites=sites, target_overlap=args.target_overlap, seed=args.seed + ) + system.add_users_bulk([f"user{i}" for i in range(1, args.n_users + 1)]) + + print_summary(system) + system.to_xlsx(args.output_xlsx) + system.to_json(args.output_json) diff --git a/UI/user_task_assignment/sites_config.json b/UI/user_task_assignment/sites_config.json new file mode 100644 index 0000000..f63ec13 --- /dev/null +++ b/UI/user_task_assignment/sites_config.json @@ -0,0 +1,17 @@ +[ + { + "url": "https://giove.isti.cnr.it/users/leonardi/decathlon.html", + "allowed_images": [17, 18, 19 ,20 ,21 ,22 ,23 ,24 ,25 ,26 ,27 ,28 ,29,30, 31, 32,33,34,35, 36, 37, 38, 39, 40 ,41,42, 43 ,44 ,45, 46, 47, 48], + "images_per_user": 6 + }, + { + "url": "https://giove.isti.cnr.it/users/leonardi/etsy.html", + "allowed_images": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30], + "images_per_user": 6 + }, + { + "url": "https://giove.isti.cnr.it/users/manca/amazon_maglioni.html", + "allowed_images": [5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34], + "images_per_user": 6 + } +] diff --git a/UI/user_task_assignment/user_assignment_manager.py b/UI/user_task_assignment/user_assignment_manager.py new file mode 100644 index 0000000..5fa9c5b --- /dev/null +++ b/UI/user_task_assignment/user_assignment_manager.py @@ -0,0 +1,633 @@ +""" +User Assignment Manager +======================= +Manages user-to-assignment mapping with automatic Round 2+ generation. + +This module: +- Maintains assignment data in SQLite database +- Loads existing assignments from JSON file (the exported output of the AssignmentSystem) +- Automatically generates new assignments when user count exceeds current capacity +- Retrieves assignments for registered users + +Usage +----- + manager = UserAssignmentManager( + db_path="wcag_validator_ui.db", + config_json_path="sites_config.json", + assignments_json_path="alt_text_assignments_output_target_overlap.json", + assignments_xlsx_path="alt_text_assignments_output_target_overlap.xlsx" + ) + + # Get assignments for a user (auto-generates if needed) + assignments = manager.get_user_assignments("user123") + + # Register new active users + manager.register_active_users(["user1", "user2", "user3"]) +""" + +import json +import sqlite3 +from pathlib import Path +from typing import Dict, List, Optional, Tuple +from collections import defaultdict + +# Import the assignment system +import sys +from .alt_text_assignment_target_overlap_multiple_round import ( + AssignmentSystem, + SiteConfig, +) + + +class UserAssignmentManager: + """ + Manages user-to-assignment mapping with SQLite persistence. + + Automatically handles Round 2+ mode when user count exceeds current capacity. + """ + + def __init__( + self, + db_path: str, + config_json_path: str, + assignments_json_path: str = "alt_text_assignments_output_target_overlap.json", + assignments_xlsx_path: str = "alt_text_assignments_output_target_overlap.xlsx", + target_overlap: int = 2, + seed: int = 42, + ): + """ + Initialize the User Assignment Manager. + + Parameters + ---------- + db_path : str + Path to SQLite database file + config_json_path : str + Path to sites configuration JSON file (from --config_json) + assignments_json_path : str + Path to output assignments JSON file + assignments_xlsx_path : str + Path to output assignments XLSX file + target_overlap : int + Minimum overlap between user image assignments + seed : int + Random seed for reproducibility + """ + self.db_path = Path(db_path) + self.config_json_path = Path(config_json_path) + self.assignments_json_path = Path(assignments_json_path) + self.assignments_xlsx_path = Path(assignments_xlsx_path) + self.target_overlap = target_overlap + self.seed = seed + + # Load configuration + self.sites_config = self._load_sites_config() + + # Initialize database + self._init_database() + + # Load existing assignments from JSON if available + self._load_existing_assignments() + + def _load_sites_config(self) -> List[SiteConfig]: + """Load site configuration from JSON.""" + with open(self.config_json_path, "r", encoding="utf-8") as f: + site_defs = json.load(f) + + return [ + SiteConfig( + url=sd["url"], + allowed_images=sd["allowed_images"], + images_per_user=sd.get("images_per_user", 6), + ) + for sd in site_defs + ] + + def _init_database(self): + """Initialize SQLite database with user_assignments table.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Create user_assignments table + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS user_assignments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id TEXT NOT NULL, + site_url TEXT NOT NULL, + image_indices TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE(user_id, site_url) + ) + """ + ) + + # Create index for fast user lookups + cursor.execute( + """ + CREATE INDEX IF NOT EXISTS idx_user_assignments_user_id + ON user_assignments(user_id) + """ + ) + + # Create assignment_generation_log table to track Round 2+ runs + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS assignment_generation_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + generation_round INTEGER NOT NULL, + users_before INTEGER NOT NULL, + users_after INTEGER NOT NULL, + new_users_added INTEGER NOT NULL, + generated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + json_file TEXT, + xlsx_file TEXT + ) + """ + ) + + # Create table to map user_id and user_name + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS user_info ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id TEXT NOT NULL UNIQUE, + user_name TEXT + ) """ + ) + + conn.commit() + conn.close() + + def _load_existing_assignments(self, active_user_names: Optional[List[str]] = None): + """Load existing assignments from JSON file into database if not already there.""" + if not self.assignments_json_path.exists(): + return + + with open(self.assignments_json_path, "r", encoding="utf-8") as f: + data = json.load(f) + + assignments = data.get("assignments", {}) + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + for user_id, sites_dict in assignments.items(): + for site_url, image_indices in sites_dict.items(): + # print(f"[DB] Loading assignment for user {user_id}, site {site_url}, " + # f"{image_indices} images") + try: + ''' + cursor.execute(""" + INSERT OR IGNORE INTO user_assignments + (user_id, site_url, image_indices) + VALUES (?, ?, ?) + """, (user_id, site_url, json.dumps(image_indices)))''' + + cursor.execute( + """ + INSERT INTO user_assignments (user_id, site_url, image_indices) + VALUES (?, ?, ?) + ON CONFLICT(user_id, site_url) DO UPDATE SET + image_indices = excluded.image_indices, + updated_at = CURRENT_TIMESTAMP + """, + (user_id, site_url, json.dumps(image_indices)), + ) + + cursor.execute( # also update user_info table with user_name if active_user_names is provided and user_id starts with "user" + """ + INSERT INTO user_info (user_id, user_name) + VALUES (?, ?) + ON CONFLICT(user_id) DO UPDATE SET + user_name = excluded.user_name + """, + ( + user_id, + ( + active_user_names[int(user_id[4:]) - 1] + if active_user_names and user_id.startswith("user") + else None + ), + ), + ) + + except sqlite3.IntegrityError: + print( + f"[DB] Error. Skipping existing assignment for user {user_id}, site {site_url}" + ) + pass + + conn.commit() + conn.close() + + def get_user_assignments( + self, user_id: str, from_user_name: bool = False + ) -> Optional[Dict[str, List[int]]]: + """ + Get assignments for a user from database. + + Parameters + ---------- + user_id : str + User ID + from_user_name : bool + If True, treat user_id as user_name and look up corresponding user_id in user_info table before fetching assignments + Returns + ------- + dict or None + {site_url: [image_indices]} or None if user not found + """ + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + if from_user_name: + print(f"[DB] Looking up user_id for user_name: {user_id}") + + cursor.execute( + """ + SELECT user_id + FROM user_info + WHERE user_name = ? + """, + (user_id,), + ) + result = cursor.fetchone() + print(f"[DB] Lookup result for user_name '{user_id}': {result}") + if not result: + conn.close() + return None + user_id = result[0] + + cursor.execute( + """ + SELECT site_url, image_indices + FROM user_assignments + WHERE user_id = ? + """, + (user_id,), + ) + + rows = cursor.fetchall() + conn.close() + + if not rows: + return None + + assignments = {} + for site_url, image_indices_json in rows: + assignments[site_url] = json.loads(image_indices_json) + + return assignments + + def get_all_user_ids(self) -> List[str]: + """Get all registered user IDs from database.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + """ + SELECT DISTINCT user_id + FROM user_assignments + ORDER BY user_id + """ + ) + + user_ids = [row[0] for row in cursor.fetchall()] + conn.close() + + return user_ids + + def register_active_users( + self, active_user_ids: List[str] + ) -> Dict[str, Dict[str, List[int]]]: + """ + Register active users and ensure assignments exist for all. + + If user count exceeds current capacity: + 1. Calls get_managed_user_count() to check capacity + 2. Runs AssignmentSystem in Round 2+ mode if needed + 3. Updates JSON/XLSX files + 4. Logs the generation event + + Parameters + ---------- + active_user_ids : list of str + List of currently active user IDs + + Returns + ------- + dict + {user_id: {site_url: [image_indices]}} for all active users + """ + managed_count = self.get_managed_user_count() + new_user_count = len(active_user_ids) + + active_user_names = active_user_ids + print(f"\n[UserAssignmentManager] active_user_name: {active_user_names}") + + # Check if we need to generate new assignments + if new_user_count > managed_count: + num_new_users = new_user_count - managed_count + print(f"\n[UserAssignmentManager] Expanding assignments:") + print(f" Current capacity: {managed_count} users") + print(f" Required capacity: {new_user_count} users") + print(f" Generating {num_new_users} additional assignments...\n") + + self._generate_round2_assignments(num_new_users, active_user_names) + + # Retrieve assignments for all active users + result = {} + for user_id in active_user_ids: + assignments = self.get_user_assignments(user_id) + if assignments is None: + print(f"[WARNING] No assignments found for user {user_id}. It is fine") + else: + result[user_id] = assignments + + return result + + def get_managed_user_count(self) -> int: + """ + Get the number of users currently managed by assignments. + + Returns + ------- + int + Number of unique users with assignments + """ + return len(self.get_all_user_ids()) + + def _generate_round2_assignments( + self, num_new_users: int, active_user_names: List[str] + ): + """ + Generate Round 2+ assignments using AssignmentSystem. + + Parameters + ---------- + num_new_users : int + Number of new users to add + """ + current_users = self.get_managed_user_count() + + # Create AssignmentSystem with current site configuration + system = AssignmentSystem( + sites=self.sites_config, + target_overlap=self.target_overlap, + seed=self.seed, + ) + + # Load existing assignments + system = self._load_previous_assignments_into_system(system) + + # Generate new users + new_user_ids = [f"user{current_users + i}" for i in range(1, num_new_users + 1)] + new_user_names = active_user_names[ + current_users : current_users + num_new_users + ] + + print( + f"[AssignmentSystem] Adding users: {new_user_ids[0]} to {new_user_ids[-1]}" + ) + print( + f"[AssignmentSystem] Corresponding names: {new_user_names[0]} to {new_user_names[-1]}" + ) + + for uid in new_user_ids: + system.add_user(uid) + + # Save updated assignments + print(f"[AssignmentSystem] Saving to {self.assignments_json_path}") + system.to_json(str(self.assignments_json_path)) + + print(f"[AssignmentSystem] Saving to {self.assignments_xlsx_path}") + system.to_xlsx(str(self.assignments_xlsx_path)) + + # Load new assignments into database + self._load_existing_assignments( + active_user_names=active_user_names + ) # pass active_user_names (entire list) to update user_info table with names + + # Log the generation event + self._log_generation_event( + generation_round=self._get_generation_round() + 1, + users_before=current_users, + users_after=current_users + num_new_users, + new_users_added=num_new_users, + json_file=str(self.assignments_json_path), + xlsx_file=str(self.assignments_xlsx_path), + ) + + print(f"[UserAssignmentManager] Assignments updated successfully") + + def _load_previous_assignments_into_system( + self, system: AssignmentSystem + ) -> AssignmentSystem: + """ + Load previously exported assignments into an AssignmentSystem object. + + Parameters + ---------- + system : AssignmentSystem + Already-constructed system with correct SiteConfig pool + + Returns + ------- + AssignmentSystem + System with existing assignments populated + """ + if not self.assignments_xlsx_path.exists(): + print( + f"[AssignmentSystem] No previous assignments found at {self.assignments_xlsx_path}" + ) + return system + + print( + f"[AssignmentSystem] Loading previous assignments from {self.assignments_xlsx_path}" + ) + + # Use the _load_previous_assignments function from the module + from .alt_text_assignment_target_overlap_multiple_round import ( + _load_previous_assignments, + ) + + return _load_previous_assignments(system, str(self.assignments_xlsx_path)) + + def _get_generation_round(self) -> int: + """Get the current generation round from log.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + """ + SELECT MAX(generation_round) + FROM assignment_generation_log + """ + ) + + result = cursor.fetchone()[0] + conn.close() + + return result if result is not None else 0 + + def _log_generation_event( + self, + generation_round: int, + users_before: int, + users_after: int, + new_users_added: int, + json_file: str, + xlsx_file: str, + ): + """Log a generation event to the database.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + """ + INSERT INTO assignment_generation_log + (generation_round, users_before, users_after, new_users_added, json_file, xlsx_file) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + generation_round, + users_before, + users_after, + new_users_added, + json_file, + xlsx_file, + ), + ) + + conn.commit() + conn.close() + + print( + f"[DB] Logged generation event: Round {generation_round}, " + f"{users_before}→{users_after} users" + ) + + def get_generation_history(self) -> List[Dict]: + """Get the complete assignment generation history.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + """ + SELECT generation_round, users_before, users_after, new_users_added, + generated_at, json_file, xlsx_file + FROM assignment_generation_log + ORDER BY generation_round ASC + """ + ) + + columns = [desc[0] for desc in cursor.description] + history = [dict(zip(columns, row)) for row in cursor.fetchall()] + conn.close() + + return history + + def get_statistics(self) -> Dict: + """Get statistics about user assignments.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Total unique users + cursor.execute("SELECT COUNT(DISTINCT user_id) FROM user_assignments") + total_users = cursor.fetchone()[0] + + # Users per site + cursor.execute( + """ + SELECT site_url, COUNT(DISTINCT user_id) as user_count + FROM user_assignments + GROUP BY site_url + ORDER BY site_url + """ + ) + + users_per_site = {row[0]: row[1] for row in cursor.fetchall()} + + # Average images per user per site + cursor.execute( + """ + SELECT site_url, + AVG(json_array_length(image_indices)) as avg_images + FROM user_assignments + GROUP BY site_url + ORDER BY site_url + """ + ) + + avg_images_per_site = {} + for row in cursor.fetchall(): + try: + # Fallback for older SQLite versions + avg_images_per_site[row[0]] = row[1] + except (TypeError, IndexError): + pass + + conn.close() + + return { + "total_users": total_users, + "users_per_site": users_per_site, + "avg_images_per_site": avg_images_per_site, + } + + +if __name__ == "__main__": + """Demo/test usage""" + import sys + + # Set paths (adjust as needed for your environment) + base_dir = Path(__file__).parent + + manager = UserAssignmentManager( + db_path=str( + Path(__file__).parent.parent / "persistence" / "wcag_validator_ui.db" + ), + config_json_path=str(base_dir / "sites_config.json"), + assignments_json_path=str( + base_dir / "alt_text_assignments_output_target_overlap.json" + ), + assignments_xlsx_path=str( + base_dir / "alt_text_assignments_output_target_overlap.xlsx" + ), + ) + + print("\n=== User Assignment Manager Demo ===\n") + + # Get current managed users + managed_users = manager.get_all_user_ids() + print(f"Currently managed users: {managed_users}") + print(f"Total managed users: {manager.get_managed_user_count()}\n") + + # Define active users (including new ones) + active_users = [f"user{i}" for i in range(1, 8)] + print(f"Active users (including new): {active_users}\n") + + # Register and get assignments + print("Registering active users...") + assignments = manager.register_active_users(active_users) + + print(f"\nAssignments for {len(assignments)} users:") + for user_id in sorted(assignments.keys())[:3]: # Show first 3 + print(f" {user_id}: {len(assignments[user_id])} sites") + + # Get statistics + stats = manager.get_statistics() + print(f"\n=== Statistics ===") + print(f"Total users: {stats['total_users']}") + print(f"Users per site: {stats['users_per_site']}") + + # Get history + history = manager.get_generation_history() + if history: + print(f"\n=== Generation History ===") + for event in history[-3:]: + print( + f" Round {event['generation_round']}: {event['users_after']} users " + f"({event['new_users_added']} new)" + ) diff --git a/UI/wcag_validator_ui.py b/UI/wcag_validator_ui.py index 877db24..159d4b6 100644 --- a/UI/wcag_validator_ui.py +++ b/UI/wcag_validator_ui.py @@ -28,10 +28,59 @@ import urllib.parse import os import sqlite3 + + +from user_task_assignment.user_assignment_manager import UserAssignmentManager + +user_assignment_manager = UserAssignmentManager( + db_path="persistence/wcag_validator_ui.db", + config_json_path="user_task_assignment/sites_config.json", + assignments_json_path="user_task_assignment/alt_text_assignments_output_target_overlap.json", + assignments_xlsx_path="user_task_assignment/alt_text_assignments_output_target_overlap.xlsx" +) + +# Get current managed users +managed_users = user_assignment_manager.get_all_user_ids() +print(f"Currently managed users from db: {managed_users}") +print(f"Total managed users from db: {user_assignment_manager.get_managed_user_count()}\n") + +user_assignment_stats = user_assignment_manager.get_statistics() +print(f"Current assignment stats:{user_assignment_stats} \n") + + WCAG_VALIDATOR_RESTSERVER_HEADERS = [("Content-Type", "application/json")] +def display_user_assignment(user_state): + if user_state and "username" in user_state: + username = user_state["username"] + print(f"Fetching assignment for user: {username}") + assignments = user_assignment_manager.get_user_assignments(username, from_user_name=True) + + if assignments is not None: + print (f"Your current assignment: {assignments}") + + else: + #return "No assignments found for you. Please contact the administrator." + return pd.DataFrame() + data_frame = [] + for url in assignments : + #print(f"URL: {url}, Assigned Image List: {assignments[url]}") + data_frame.append( + { + "Website URL": url, + "Assigned Image Number List": assignments[url] + } + ) + + df = pd.DataFrame(data_frame) + #print(f"DataFrame to display for user {username}:\n{df}") + return df + else: + #return "User not logged in." + return pd.DataFrame() + def process_dataframe(db_path, url, updated_df, user_state={},llm_response_output={}): print("Processing dataframe to adjust columns...type:",type(updated_df)) @@ -632,6 +681,7 @@ with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: llm_response_output = gr.State() alttext_popup_html_state = gr.State("") + user_assignment_manager_state = gr.State(value=user_assignment_manager) with Modal(visible=False, allow_user_close=False) as alttext_modal: gr.Markdown("## Alt Text LLMs Assessment Results") @@ -668,7 +718,7 @@ with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: # login section user_state = gr.State({"logged_in": False, "username": None}) - with gr.Accordion(label="Register & Login", open=True) as register_and_login: + with gr.Accordion(label="Your Info", open=True) as register_and_login: with gr.Column(visible=True) as login_section: gr.Markdown("## Login / Register") @@ -706,6 +756,19 @@ with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: content_display = gr.Textbox( label="Your account", lines=5, interactive=False ) + + user_assignment_status = gr.DataFrame( + headers=[ + "Website URL", + "Assigned Image Number List" + #"Assignment Status", + ], + label="Your Current Assignment", + wrap=True, # Wrap text in cells + interactive=False, + scale=7, + ) + logout_btn = gr.Button("Logout", variant="stop") # end login section @@ -724,7 +787,7 @@ with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: value=url_list[0], multiselect=False, label="Select an URL", - info="Select an URL to load in iframe", + info="Select an URL to load", ) images_number = gr.Slider( 5, @@ -898,11 +961,11 @@ with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: alttext_assessment, register_and_login, ], - ).then(fn=protected_content, inputs=[user_state], outputs=[content_display]) + ).then(fn=protected_content, inputs=[user_state], outputs=[content_display]).then(fn=display_user_assignment, inputs=[user_state], outputs=[user_assignment_status]) reg_btn.click( fn=register_user, - inputs=[reg_username, reg_password, reg_confirm], + inputs=[reg_username, reg_password, reg_confirm,user_assignment_manager_state], outputs=[login_msg, reg_msg, user_state], ) diff --git a/UI/wcag_validator_ui_pre_user_task_assig.py b/UI/wcag_validator_ui_pre_user_task_assig.py new file mode 100644 index 0000000..877db24 --- /dev/null +++ b/UI/wcag_validator_ui_pre_user_task_assig.py @@ -0,0 +1,923 @@ +#### To launch the script +# gradio wcag_validator_ui.py +# python wcag_validator_ui.py + +import gradio as gr +from gradio_modal import Modal +import requests +from pathlib import Path +import sys +import pandas as pd + +parent_dir = Path(__file__).parent.parent +sys.path.insert(0, str(parent_dir)) +from dotenv import load_dotenv, find_dotenv +from dependences.utils import ( + call_API_urlibrequest, + create_folder, + db_persistence_startup, + db_persistence_insert, + return_from_env_valid, +) +from dependences_ui.utils import * +import logging +import time +import json +import urllib.request +import urllib.parse +import os +import sqlite3 + + +WCAG_VALIDATOR_RESTSERVER_HEADERS = [("Content-Type", "application/json")] + + +def process_dataframe(db_path, url, updated_df, user_state={},llm_response_output={}): + + print("Processing dataframe to adjust columns...type:",type(updated_df)) + # accept different input forms from UI (DataFrame, JSON string, or list of dicts) + try: + + if isinstance(updated_df, str): + try: + updated_df = pd.read_json(updated_df, orient="records") + except Exception: + updated_df = pd.read_json(updated_df) + elif isinstance(updated_df, list): + updated_df = pd.DataFrame(updated_df) + except Exception as e: + return f"Error parsing updated data: {str(e)}" + for column_rating_name in ["User Assessment for LLM Proposal 1", "User Assessment for LLM Proposal 2"]: + + # Get the assessment column + try: + updated_df[column_rating_name] = updated_df[column_rating_name].astype(int) + except ValueError: + return "Error: User Assessment for LLM Proposal must be an integer" + except KeyError: + return f"No data Saved because no image selected. Please select at least one image." + except Exception as e: + return f"Error processing User Assessment for LLM Proposal: {str(e)}" + + if (updated_df[column_rating_name] < 1).any() or ( + updated_df[column_rating_name] > 5 + ).any(): + return "Error: User Assessment for LLM Proposal must be between 1 and 5" + + dataframe_json = updated_df.to_json(orient="records") + connection_db = sqlite3.connect(db_path) + json_user_str = json.dumps({"username": user_state["username"]}, ensure_ascii=False) + + llm_response_output_str = json.dumps(llm_response_output, ensure_ascii=False) #recuperato dalla chiamata all'llm, ho tutte le info anche sulle immagini + try: + # insert after everything to keep datetime aligned + db_persistence_insert( + connection_db=connection_db, + insert_type="wcag_user_llm_alttext_assessments", + page_url=url, + user=json_user_str, + llm_model="", + json_in_str=llm_response_output_str,#dataframe_json, # to improve + json_out_str=dataframe_json, + table="wcag_user_assessments", + ) + except Exception as e: + print("Error inserting user assessment into database:", str(e)) + finally: + if connection_db: + connection_db.close() + return "User assessment saved successfully!" + + +def load_images_from_json(json_input): + """Extract URLs and alt text from JSON and create HTML gallery""" + try: + data = json_input + + if "images" not in data or not data["images"]: + return "No images found in JSON", "" + + images = data["images"] + info_text = f"Found {len(images)} image(s)" + + + # Create HTML gallery with checkboxes and assessment forms + html = """ + + " + + return info_text, html + + except json.JSONDecodeError as e: + return f"Error: Invalid JSON format - {str(e)}", "" + except Exception as e: + return f"Error: {str(e)}", "" + + +def load_llm_assessment_from_json(json_input): + + try: + # Parse JSON input + data = json_input + + if "mllm_validations" not in data or not data["mllm_validations"]: + print("no mllm_validations found") + return pd.DataFrame() + + if ( + data["mllm_validations"]["mllm_alttext_assessments"].get("mllm_alttext_assessments_openai") + + and data["mllm_validations"]["mllm_alttext_assessments"].get("mllm_alttext_assessments_local") + + ): + + is_single_model_output = False + info_text = f"Assessment done by {len(data['mllm_validations']['mllm_alttext_assessments'])} models on {len(data['mllm_validations']['mllm_alttext_assessments']['mllm_alttext_assessments_openai'])} image(s)\n\n" + print( + f"The response contains multiple models output. Assessment done by {len(data['mllm_validations']['mllm_alttext_assessments'])} models on {len(data['mllm_validations']['mllm_alttext_assessments']['mllm_alttext_assessments_openai'])} image(s)" + ) + + else: + + is_single_model_output = True + info_text = f"Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)\n\n" + print( + f"The response contains only one output. Assessment done on {len(data['mllm_validations']['mllm_alttext_assessments'])} image(s)" + ) + + data_frame = [] + if is_single_model_output: + for idx, img_data in enumerate( + data["mllm_validations"]["mllm_alttext_assessments"], 1 + ): + + original_alt_text_assessment = img_data["mllm_response"].get( + "original_alt_text_assessment", "No description" + ) + new_alt_text = img_data["mllm_response"].get( + "new_alt_text", "No description" + ) + alt_text_original = img_data.get("alt_text", "No alt_text provided") + + data_frame.append( + { + "Original Alt Text": alt_text_original, + "LLM Assessment": original_alt_text_assessment, + "LLM Proposed Alt Text": new_alt_text, + } + ) + else: + + for idx, img_data in enumerate( + data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_openai"], 1 + ): + + original_alt_text_assessment = img_data["mllm_response"].get( + "original_alt_text_assessment", "No description" + ) + new_alt_text = img_data["mllm_response"].get( + "new_alt_text", "No description" + ) + alt_text_original = img_data.get("alt_text", "No alt_text provided") + + """data_frame.append( + { + "Original Alt Text": alt_text_original, + "LLM Assessment": original_alt_text_assessment, + "LLM Proposed Alt Text": new_alt_text, + } + )""" + #for idx, img_data in enumerate( + # data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_local"], 1 + #): + img_data_local = data["mllm_validations"]["mllm_alttext_assessments"]["mllm_alttext_assessments_local"][idx-1] + original_alt_text_assessment_local = img_data_local["mllm_response"].get( + "original_alt_text_assessment", "No description" + ) + new_alt_text_local = img_data_local["mllm_response"].get( + "new_alt_text", "No description" + ) + #alt_text_original = img_data.get("alt_text", "No alt_text provided") + + data_frame.append( + { + "Original Alt Text": alt_text_original, + "LLM Assessment 1": original_alt_text_assessment, + "LLM Proposed Alt Text 1": new_alt_text, + "LLM Assessment 2": original_alt_text_assessment_local, + "LLM Proposed Alt Text 2": new_alt_text_local, + } + ) + + df = pd.DataFrame(data_frame) + + return df + + except json.JSONDecodeError as e: + return f"Error: Invalid JSON format - {str(e)}", [] + except Exception as e: + return f"Error: {str(e)}", [] + + +def make_alttext_llm_assessment_api_call( + url, + selected_images_json=[], + db_path=None, + wcag_rest_server_url="http://localhost:8000", + user_state={}, + number_of_images=30, +): + + print( + f"Making API call for llm assessment for {url} to {wcag_rest_server_url}/wcag_alttext_validation" + ) + selected_images = json.loads(selected_images_json) if selected_images_json else [] + # print("selected_images:", selected_images) + + if not selected_images or len(selected_images) == 0: + info_text = "No images selected" + + return "LLM assessment not started", pd.DataFrame(), {} + + # prepare data for insertion + json_in_str = {} + json_out_str = {} + selected_urls = [] + selected_alt_text_original = [] + user_assessments = [] + user_new_alt_texts = [] + selected_image_id = [] + user_assessments_llm_proposal_1 = [] + user_assessments_llm_proposal_2 = [] + + for img in selected_images: + selected_urls.append(img["image_url"]) + selected_alt_text_original.append(img["original_alt_text"]) + user_assessments.append(img["assessment"]) + user_new_alt_texts.append(img["new_alt_text"]) + selected_image_id.append( + int(img["image_index"]) + 1 + ) # add the id selected (+1 for index alignment) + user_assessments_llm_proposal_1.append(3) # default value for now + user_assessments_llm_proposal_2.append(3) # default value for now + json_in_str["images_urls"] = selected_urls + json_in_str["images_alt_text_original"] = selected_alt_text_original + json_out_str["user_assessments"] = user_assessments + json_out_str["user_new_alt_texts"] = user_new_alt_texts + json_in_str = json.dumps(json_in_str, ensure_ascii=False) + json_out_str = json.dumps(json_out_str, ensure_ascii=False) + json_user_str = json.dumps({"username": user_state["username"]}, ensure_ascii=False) + connection_db = sqlite3.connect(db_path) + # --------- + + try: + + response = call_API_urlibrequest( + data={ + "page_url": url, + "number_of_images": number_of_images, + "context_levels": 5, + "pixel_distance_threshold": 200, + "save_images": "True", + "save_elaboration": "True", + "specific_images_urls": selected_urls, + }, + url=wcag_rest_server_url + "/wcag_alttext_validation", + headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, + ) + # return response + info_dataframe = load_llm_assessment_from_json(response) + #print("info_dataframe:", info_dataframe) + + # add the UI ids and other fields to to api response + info_dataframe.insert( + 0, "Image #", selected_image_id + ) # add the UI ids from to api response + info_dataframe.insert(2, "User Assessment", user_assessments) + + info_dataframe.insert(3, "User Proposed Alt Text", user_new_alt_texts) + info_dataframe["User Assessment for LLM Proposal 1"] = ( + user_assessments_llm_proposal_1 + ) + info_dataframe["User Assessment for LLM Proposal 2"] = ( + user_assessments_llm_proposal_2 + ) + #print("info_dataframe after adding user assessments:", info_dataframe) + + except Exception as e: + return {"error": str(e)} + + try: + # insert after everything to keep datetime aligned + db_persistence_insert( + connection_db=connection_db, + insert_type="wcag_user_alttext_assessments", + page_url=url, + user=json_user_str, + llm_model="", + json_in_str=json_in_str, + json_out_str=json_out_str, + table="wcag_user_assessments", + ) + except Exception as e: + print("Error inserting user assessment into database:", str(e)) + finally: + if connection_db: + connection_db.close() + return "LLM assessment completed", info_dataframe, response + + +def make_image_extraction_api_call( + url, + number_of_images=30, + wcag_rest_server_url="http://localhost:8000", +): + print( + f"Making API call for image_extraction for {url} to {wcag_rest_server_url}/extract_images" + ) + try: + + response = call_API_urlibrequest( + data={ + "page_url": url, + "number_of_images": number_of_images, + }, + url=wcag_rest_server_url + "/extract_images", + headers=WCAG_VALIDATOR_RESTSERVER_HEADERS, + ) + # return response + info_text, gallery_images = load_images_from_json(response) + + return info_text, gallery_images + except Exception as e: + return {"error": str(e)} + + +def render_alttext_form(df): + """Render a pandas DataFrame (or list/dict) into an editable HTML form.""" + try: + if df is None: + return "" + if isinstance(df, str): + df = pd.read_json(df, orient="records") + if isinstance(df, dict): + df = pd.DataFrame(df) + if isinstance(df, list): + df = pd.DataFrame(df) + + html = """ + + + + + + + + + + + + + + + + + + + """ + + for _, row in df.iterrows(): + imgnum = row.get("Image #", "") + orig = row.get("Original Alt Text", "") + user_ass = row.get("User Assessment", "") + user_prop = row.get("User Proposed Alt Text", "") + llm1_ass = row.get("LLM Assessment 1", "") + llm2_ass = row.get("LLM Assessment 2", "") + llm1_prop = row.get("LLM Proposed Alt Text 1", "") + llm2_prop = row.get("LLM Proposed Alt Text 2", "") + + user_llm1_ass = row.get("User Assessment for LLM Proposal 1", 3) + user_llm2_ass = row.get("User Assessment for LLM Proposal 2", 3) + + html += f""" + + + + + + + + + + + + + + """ + + html += """ + +
Image #Original Alt TextUser AssessmentUser Proposed Alt TextLLM Assessment 1LLM Proposed Alt Text 1User Assessment for LLM Proposal 1LLM Assessment 2LLM Proposed Alt Text 2User Assessment for LLM Proposal 2
{imgnum}{orig}{user_ass}{user_prop}{llm1_ass}{llm1_prop} + + {llm2_ass}{llm2_prop} + +
+ """ + + return gr.update(value=html), html + except Exception as e: + return f"Error rendering form: {str(e)}" + + +# ------- Gradio Interface -------# + + +# Create Gradio interface +with gr.Blocks(theme=gr.themes.Glass(), title="WCAG AI Validator") as demo: + + + + gr.HTML(""" + + """) + + + llm_response_output = gr.State() + alttext_popup_html_state = gr.State("") + + with Modal(visible=False, allow_user_close=False) as alttext_modal: + gr.Markdown("## Alt Text LLMs Assessment Results") + gr.Markdown("To assess the LLMs outputs, use the dropdowns to indicate how much you agree with the LLM proposed alt text.") + alttext_modal_content = gr.HTML("") + close_modal_btn = gr.Button("Save Your Assessment", variant="secondary",elem_classes=["close-modal-btn"]) + + env_path = find_dotenv(filename=".env") + if env_path == "": + print("env path not found: service starting with the default params values") + _ = load_dotenv(env_path) # read .env file + db_path = return_from_env_valid("DB_PATH", "persistence/wcag_validator_ui.db") + print("db_path:", db_path) + wcag_rest_server_url = return_from_env_valid( + "WCAG_REST_SERVER_URL", "http://localhost:8000" + ) + + default_urls = [ + "https://amazon.com", + "https://ebay.com", + ] + url_list_str = return_from_env_valid("URL_LIST", json.dumps(default_urls)) + url_list = json.loads(url_list_str) + + print("wcag_rest_server_url:", wcag_rest_server_url) + + connection_db = db_persistence_startup( + db_name_and_path=db_path, table="wcag_user_assessments" + ) + print("Database connection reference available:", connection_db) + connection_db.close() + + gr.Markdown("# WCAG AI Validator UI") + + # login section + user_state = gr.State({"logged_in": False, "username": None}) + with gr.Accordion(label="Register & Login", open=True) as register_and_login: + with gr.Column(visible=True) as login_section: + gr.Markdown("## Login / Register") + + with gr.Tab("Login"): + login_username = gr.Textbox( + label="Username", placeholder="Enter your username" + ) + login_password = gr.Textbox( + label="Password", type="password", placeholder="Enter your password" + ) + + login_btn = gr.Button("Login", variant="primary") + login_msg = gr.Textbox(label="Login Status", interactive=False) + + with gr.Tab("Register"): + reg_username = gr.Textbox( + label="Username", placeholder="Choose a username" + ) + reg_password = gr.Textbox( + label="Password", + type="password", + placeholder="Choose a password (min 6 characters)", + ) + reg_confirm = gr.Textbox( + label="Confirm Password", + type="password", + placeholder="Confirm your password", + ) + + reg_btn = gr.Button("Register", variant="primary") + reg_msg = gr.Textbox(label="Registration Status", interactive=True) + + with gr.Column(visible=False) as protected_section: + + content_display = gr.Textbox( + label="Your account", lines=5, interactive=False + ) + logout_btn = gr.Button("Logout", variant="stop") + + # end login section + + with gr.Tab("Alt Text Assessment", visible=False) as alttext_assessment: + + db_path_state = gr.State(value=db_path) # Store path in State\ + wcag_rest_server_url_state = gr.State(value=wcag_rest_server_url) + with gr.Row(): + with gr.Column(): + + with gr.Row(): + with gr.Column(): + url_input = gr.Dropdown( + url_list, + value=url_list[0], + multiselect=False, + label="Select an URL", + info="Select an URL to load in iframe", + ) + images_number = gr.Slider( + 5, + 100, + value=50, + step=5, + label="Max number of images to retrieve", + visible=False, + ) + with gr.Column(): + + image_extraction_api_call_btn = gr.Button( + "Extract Images & Alt Texts", variant="primary" + ) + alttext_api_call_btn = gr.Button( + "Start LLMs Assessment", + variant="secondary", + interactive=False, + ) + image_info_output = gr.Textbox( + label="Activity tracking", lines=1 + ) + + with gr.Row(visible=False) as alttext_results_row: + + # Store the DataFrame in state and render a clear HTML form for user edits + alttext_info_state = gr.State() + alttext_form = gr.HTML(label="Assessment Form") + alttext_form_data = gr.JSON(visible=False) + + + with gr.Row(): + gallery_html = gr.HTML(label="Image Gallery") + + + + image_extraction_api_call_btn.click( + fn=lambda: ("", "", gr.update(visible=False), gr.Button(interactive=False)), + inputs=[], + outputs=[ + image_info_output, + gallery_html, + alttext_results_row, + alttext_api_call_btn, + ], + ).then( + make_image_extraction_api_call, + inputs=[url_input, images_number, wcag_rest_server_url_state], + outputs=[image_info_output, gallery_html], + ).then( + fn=lambda: gr.Button(interactive=True), + inputs=[], + outputs=[alttext_api_call_btn], + ) + + alttext_api_call_btn.click( + fn=make_alttext_llm_assessment_api_call, + inputs=[ + url_input, + gallery_html, + db_path_state, + wcag_rest_server_url_state, + user_state, + ], + outputs=[image_info_output, alttext_info_state, llm_response_output], + js=""" + (url_input,gallery_html) => { + const checkboxes = document.querySelectorAll('.image-checkbox:checked'); + if (checkboxes.length === 0) { + alert('Please select at least one image!'); + return [url_input,JSON.stringify([])]; + } + if (checkboxes.length > 3) { + alert('Please select maximum 3 images!'); + return [url_input,JSON.stringify([])]; + } + const selectedData = []; + + checkboxes.forEach(checkbox => { + const index = checkbox.dataset.index; + const imageUrl = checkbox.dataset.imgurl; + const originalAlt = document.querySelector('.original-alt[data-index="' + index + '"]').value; + const assessment = document.querySelector('input[name="assessment-' + index + '"]:checked').value; + const newAltText = document.querySelector('.new-alt-text[data-index="' + index + '"]').value; + + selectedData.push({ + image_index: index, + image_url: imageUrl, + original_alt_text: originalAlt, + assessment: parseInt(assessment), + new_alt_text: newAltText + }); + }); + + return [url_input,JSON.stringify(selectedData)]; + } + """, + ).then( + fn=render_alttext_form, + inputs=[alttext_info_state], + outputs=[alttext_form,alttext_popup_html_state], + ).then( + fn=lambda html: (gr.update(value=html), Modal(visible=True)), + inputs=[alttext_popup_html_state], + outputs=[alttext_modal_content, alttext_modal], # ← populate + open modal + ) + + + + close_modal_btn.click( #the close button now save + fn=process_dataframe, + inputs=[db_path_state, url_input, alttext_form_data, user_state,llm_response_output], + outputs=[image_info_output], + js=""" + (db_path_state, url_input, alttext_form_html, user_state, llm_response_output) => { + const rows = document.querySelectorAll('.alttext-row'); + const selectedData = []; + rows.forEach(row => { + const imgNum = row.querySelector('.img-num')?.innerText || ''; + const origAlt = row.querySelector('.orig-alt')?.innerText || ''; + const userAssessment = row.querySelector('.user-assessment')?.innerText || '3'; + const userProposed = row.querySelector('.user-proposed')?.innerText || ''; + const user_llm1_ass = row.querySelector('.user_llm1_ass')?.value || '3'; + const user_llm2_ass = row.querySelector('.user_llm2_ass')?.value || '3'; + + selectedData.push({ + "Image #": imgNum, + "Original Alt Text": origAlt, + "User Assessment": parseInt(userAssessment)||3, + "User Proposed Alt Text": userProposed, + "User Assessment for LLM Proposal 1": parseInt(user_llm1_ass), + "User Assessment for LLM Proposal 2": parseInt(user_llm2_ass) + }); + }); + return [db_path_state, url_input, selectedData, user_state, llm_response_output]; + } + """, + ).then( # Close button dismisses the modal + fn=lambda: Modal(visible=False), + inputs=[], + outputs=[alttext_modal], + js=""" + async () => { + const btn = document.querySelector('.close-modal-btn'); + + // Change button text + btn.textContent = 'Saving...'; + + // Fade out + const modal = document.querySelector('.modal-container'); + modal.style.transition = 'opacity 0.4s ease'; + modal.style.opacity = '0'; + + // Wait for fade + await new Promise(resolve => setTimeout(resolve, 400)); + } + """ + ) + + # placed here at the end to give full contents visibility to events + # Event handlers + login_btn.click( + fn=login_user, + inputs=[login_username, login_password, user_state], + outputs=[ + login_msg, + reg_msg, + user_state, + login_section, + protected_section, + alttext_assessment, + register_and_login, + ], + ).then(fn=protected_content, inputs=[user_state], outputs=[content_display]) + + reg_btn.click( + fn=register_user, + inputs=[reg_username, reg_password, reg_confirm], + outputs=[login_msg, reg_msg, user_state], + ) + + logout_btn.click( + fn=logout_user, + inputs=[user_state], + outputs=[ + login_msg, + user_state, + login_section, + protected_section, + alttext_assessment, + ], + ) + + +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0", server_port=7860)