Validate generated HTML asset links
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
ffab6888cb582f29553ee3c5972bdb23b766fa74- Parents
-
2c722c3 - Tree
039a614
ffab688
ffab6888cb582f29553ee3c5972bdb23b766fa742c722c3
039a614| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/finalization.py
|
48 | 13 |
| M |
src/loader/runtime/safeguard_services.py
|
87 | 9 |
| M |
tests/test_finalization.py
|
47 | 0 |
| M |
tests/test_safeguard_services.py
|
44 | 0 |
src/loader/runtime/finalization.pymodified@@ -1218,6 +1218,8 @@ def _build_verification_repair_guidance( | ||
| 1218 | 1218 | |
| 1219 | 1219 | lines = ["Repair focus:"] |
| 1220 | 1220 | lines.extend(f"- {item}" for item in fixes) |
| 1221 | + if quality_targets: | |
| 1222 | + lines.extend(_html_quality_repair_target_lines(quality_targets)) | |
| 1221 | 1223 | primary_target = repair_targets[0] if repair_targets else None |
| 1222 | 1224 | if primary_target is not None: |
| 1223 | 1225 | expected_action = _repair_expected_action_line( |
@@ -1241,6 +1243,16 @@ def _build_verification_repair_guidance( | ||
| 1241 | 1243 | ), |
| 1242 | 1244 | "- Do not reread unrelated reference materials or restart discovery " |
| 1243 | 1245 | "while this concrete repair target is unresolved.", |
| 1246 | + *( | |
| 1247 | + [ | |
| 1248 | + "- After the concrete reference repair passes, continue with the " | |
| 1249 | + "listed content-quality targets; do not declare completion while " | |
| 1250 | + "any listed quality target remains.", | |
| 1251 | + *_html_quality_repair_strategy_lines(), | |
| 1252 | + ] | |
| 1253 | + if quality_targets | |
| 1254 | + else [] | |
| 1255 | + ), | |
| 1244 | 1256 | ] |
| 1245 | 1257 | ) |
| 1246 | 1258 | else: |
@@ -1310,24 +1322,13 @@ def _build_html_quality_repair_guidance( | ||
| 1310 | 1322 | targets: list[VerificationQualityRepairTarget], |
| 1311 | 1323 | ) -> str: |
| 1312 | 1324 | lines = ["Repair focus:"] |
| 1313 | - visible_targets = targets[:_MAX_HTML_QUALITY_REPAIR_TARGETS] | |
| 1314 | - for target in visible_targets: | |
| 1315 | - lines.append(f"- Improve `{target.artifact_path}`: {target.issue}.") | |
| 1316 | - hidden_count = len(targets) - len(visible_targets) | |
| 1317 | - if hidden_count > 0: | |
| 1318 | - lines.append( | |
| 1319 | - f"- {hidden_count} additional generated artifact(s) also failed quality; " | |
| 1320 | - "repair the listed targets first, then rerun verification for the next batch." | |
| 1321 | - ) | |
| 1325 | + lines.extend(_html_quality_repair_target_lines(targets)) | |
| 1322 | 1326 | primary = targets[0] |
| 1323 | 1327 | lines.extend( |
| 1324 | 1328 | [ |
| 1325 | 1329 | f"- Immediate next step: edit `{primary.artifact_path}` with a substantial " |
| 1326 | 1330 | "expansion or replacement that satisfies its listed quality issue.", |
| 1327 | - "- If a target says thin content, add enough concrete prose to comfortably " | |
| 1328 | - "exceed the expected text-character floor; if it says insufficient " | |
| 1329 | - "structured content, add enough real sections, lists, code, tables, or " | |
| 1330 | - "other content blocks to exceed the block floor.", | |
| 1331 | + *_html_quality_repair_strategy_lines(), | |
| 1331 | 1332 | "- Repair every listed quality target in order before any final answer; " |
| 1332 | 1333 | "do not stop after touching only the first file.", |
| 1333 | 1334 | "- Update the listed generated artifacts directly; do not recreate the " |
@@ -1339,6 +1340,31 @@ def _build_html_quality_repair_guidance( | ||
| 1339 | 1340 | return "\n".join(lines) |
| 1340 | 1341 | |
| 1341 | 1342 | |
| 1343 | +def _html_quality_repair_target_lines( | |
| 1344 | + targets: list[VerificationQualityRepairTarget], | |
| 1345 | +) -> list[str]: | |
| 1346 | + lines: list[str] = [] | |
| 1347 | + visible_targets = targets[:_MAX_HTML_QUALITY_REPAIR_TARGETS] | |
| 1348 | + for target in visible_targets: | |
| 1349 | + lines.append(f"- Improve `{target.artifact_path}`: {target.issue}.") | |
| 1350 | + hidden_count = len(targets) - len(visible_targets) | |
| 1351 | + if hidden_count > 0: | |
| 1352 | + lines.append( | |
| 1353 | + f"- {hidden_count} additional generated artifact(s) also failed quality; " | |
| 1354 | + "repair the listed targets first, then rerun verification for the next batch." | |
| 1355 | + ) | |
| 1356 | + return lines | |
| 1357 | + | |
| 1358 | + | |
| 1359 | +def _html_quality_repair_strategy_lines() -> list[str]: | |
| 1360 | + return [ | |
| 1361 | + "- If a target says thin content, add enough concrete prose to comfortably " | |
| 1362 | + "exceed the expected text-character floor; if it says insufficient " | |
| 1363 | + "structured content, add enough real sections, lists, code, tables, or " | |
| 1364 | + "other content blocks to exceed the block floor.", | |
| 1365 | + ] | |
| 1366 | + | |
| 1367 | + | |
| 1342 | 1368 | def _build_verification_failure_recovery_nudge( |
| 1343 | 1369 | dod: DefinitionOfDone, |
| 1344 | 1370 | *, |
@@ -1392,6 +1418,14 @@ def _build_verification_failure_recovery_nudge( | ||
| 1392 | 1418 | " Use the existing artifact files already on disk as the source of truth: " |
| 1393 | 1419 | f"{preview}." |
| 1394 | 1420 | ) |
| 1421 | + quality_hint = "" | |
| 1422 | + if quality_targets: | |
| 1423 | + quality_hint = ( | |
| 1424 | + f" Verification also identified {len(quality_targets)} generated " | |
| 1425 | + "artifact content-quality target(s); after the reference repair, " | |
| 1426 | + "continue with those quality targets and do not summarize completion " | |
| 1427 | + "while they remain." | |
| 1428 | + ) | |
| 1395 | 1429 | return ( |
| 1396 | 1430 | "Verification already identified the concrete repair target. " |
| 1397 | 1431 | "Do not restart discovery or reread unrelated references. " |
@@ -1400,6 +1434,7 @@ def _build_verification_failure_recovery_nudge( | ||
| 1400 | 1434 | f"`{primary_target.failing_reference}`. " |
| 1401 | 1435 | f"{expected_action[2:] if expected_action.startswith('- ') else expected_action}" |
| 1402 | 1436 | f"{source_hint}" |
| 1437 | + f"{quality_hint}" | |
| 1403 | 1438 | ) |
| 1404 | 1439 | if quality_targets: |
| 1405 | 1440 | primary_target = quality_targets[0] |
src/loader/runtime/safeguard_services.pymodified@@ -800,6 +800,13 @@ class PreActionValidator: | ||
| 800 | 800 | if not html_declared_target_result.valid: |
| 801 | 801 | return html_declared_target_result |
| 802 | 802 | |
| 803 | + html_asset_result = self._validate_html_local_asset_links( | |
| 804 | + str(file_path), | |
| 805 | + str(content), | |
| 806 | + ) | |
| 807 | + if not html_asset_result.valid: | |
| 808 | + return html_asset_result | |
| 809 | + | |
| 803 | 810 | html_root_coverage_result = self._validate_html_root_link_coverage( |
| 804 | 811 | str(file_path), |
| 805 | 812 | str(content), |
@@ -877,6 +884,13 @@ class PreActionValidator: | ||
| 877 | 884 | if not html_declared_target_result.valid: |
| 878 | 885 | return html_declared_target_result |
| 879 | 886 | |
| 887 | + html_asset_result = self._validate_html_local_asset_links( | |
| 888 | + str(file_path), | |
| 889 | + prospective_content, | |
| 890 | + ) | |
| 891 | + if not html_asset_result.valid: | |
| 892 | + return html_asset_result | |
| 893 | + | |
| 880 | 894 | return ValidationResult(valid=True) |
| 881 | 895 | |
| 882 | 896 | def _validate_patch(self, arguments: dict) -> ValidationResult: |
@@ -994,6 +1008,47 @@ class PreActionValidator: | ||
| 994 | 1008 | "\n".join(added_fragments), |
| 995 | 1009 | ) |
| 996 | 1010 | |
| 1011 | + def _validate_html_local_asset_links( | |
| 1012 | + self, | |
| 1013 | + file_path: str, | |
| 1014 | + content: str, | |
| 1015 | + ) -> ValidationResult: | |
| 1016 | + normalized = Path(file_path).expanduser() | |
| 1017 | + if normalized.suffix.lower() not in {".html", ".htm"}: | |
| 1018 | + return ValidationResult(valid=True) | |
| 1019 | + | |
| 1020 | + missing: list[str] = [] | |
| 1021 | + for href, resolved in self._collect_local_href_targets(normalized, content): | |
| 1022 | + target = self._strip_local_href_target(href) | |
| 1023 | + if target is None: | |
| 1024 | + continue | |
| 1025 | + if self._is_local_html_link_target(target): | |
| 1026 | + continue | |
| 1027 | + if not Path(target).suffix: | |
| 1028 | + continue | |
| 1029 | + if resolved.exists(): | |
| 1030 | + continue | |
| 1031 | + if href not in missing: | |
| 1032 | + missing.append(href) | |
| 1033 | + | |
| 1034 | + if not missing: | |
| 1035 | + return ValidationResult(valid=True) | |
| 1036 | + | |
| 1037 | + preview = ", ".join(missing[:3]) | |
| 1038 | + if len(missing) > 3: | |
| 1039 | + preview += ", ..." | |
| 1040 | + return ValidationResult( | |
| 1041 | + valid=False, | |
| 1042 | + reason="HTML local asset references do not exist", | |
| 1043 | + suggestion=( | |
| 1044 | + "Use only existing local assets for non-HTML href values. " | |
| 1045 | + f"Missing local asset href(s): {preview}. Remove the asset link, " | |
| 1046 | + "create the referenced asset first, inline the styling/content, or point " | |
| 1047 | + "the href at an existing local file." | |
| 1048 | + ), | |
| 1049 | + severity="error", | |
| 1050 | + ) | |
| 1051 | + | |
| 997 | 1052 | def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult: |
| 998 | 1053 | path = Path(file_path).expanduser() |
| 999 | 1054 | if path.exists() or not path.suffix or not path.parent.exists(): |
@@ -1377,20 +1432,38 @@ class PreActionValidator: | ||
| 1377 | 1432 | self, |
| 1378 | 1433 | file_path: Path, |
| 1379 | 1434 | content: str, |
| 1435 | + ) -> list[tuple[str, Path]]: | |
| 1436 | + targets: list[tuple[str, Path]] = [] | |
| 1437 | + seen: set[str] = set() | |
| 1438 | + for href, resolved in self._collect_local_href_targets(file_path, content): | |
| 1439 | + target_text = self._strip_local_href_target(href) | |
| 1440 | + if target_text is None or not self._is_local_html_link_target(target_text): | |
| 1441 | + continue | |
| 1442 | + key = f"{target_text}::{resolved}" | |
| 1443 | + if key in seen: | |
| 1444 | + continue | |
| 1445 | + seen.add(key) | |
| 1446 | + targets.append((href, resolved)) | |
| 1447 | + return targets | |
| 1448 | + | |
| 1449 | + def _collect_local_href_targets( | |
| 1450 | + self, | |
| 1451 | + file_path: Path, | |
| 1452 | + content: str, | |
| 1380 | 1453 | ) -> list[tuple[str, Path]]: |
| 1381 | 1454 | pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE) |
| 1382 | 1455 | targets: list[tuple[str, Path]] = [] |
| 1383 | 1456 | seen: set[str] = set() |
| 1384 | 1457 | for href in pattern.findall(content): |
| 1385 | - target_text = href.strip() | |
| 1386 | - if not self._is_local_html_link_target(target_text): | |
| 1458 | + target_text = self._strip_local_href_target(href) | |
| 1459 | + if target_text is None: | |
| 1387 | 1460 | continue |
| 1388 | 1461 | resolved = (file_path.parent / target_text).resolve(strict=False) |
| 1389 | - key = f"{target_text}::{resolved}" | |
| 1462 | + key = f"{href}::{resolved}" | |
| 1390 | 1463 | if key in seen: |
| 1391 | 1464 | continue |
| 1392 | 1465 | seen.add(key) |
| 1393 | - targets.append((target_text, resolved)) | |
| 1466 | + targets.append((href, resolved)) | |
| 1394 | 1467 | return targets |
| 1395 | 1468 | |
| 1396 | 1469 | def _collect_declared_html_targets( |
@@ -1451,15 +1524,20 @@ class PreActionValidator: | ||
| 1451 | 1524 | |
| 1452 | 1525 | @staticmethod |
| 1453 | 1526 | def _is_local_html_link_target(href: str) -> bool: |
| 1527 | + normalized = PreActionValidator._strip_local_href_target(href) | |
| 1528 | + return bool(normalized and normalized.lower().endswith((".html", ".htm"))) | |
| 1529 | + | |
| 1530 | + @staticmethod | |
| 1531 | + def _strip_local_href_target(href: str) -> str | None: | |
| 1454 | 1532 | target = href.strip() |
| 1455 | 1533 | if not target: |
| 1456 | - return False | |
| 1534 | + return None | |
| 1457 | 1535 | if target.startswith(("#", "mailto:", "tel:", "javascript:")): |
| 1458 | - return False | |
| 1536 | + return None | |
| 1459 | 1537 | if "://" in target: |
| 1460 | - return False | |
| 1461 | - normalized = target.split("#", 1)[0].split("?", 1)[0].strip().lower() | |
| 1462 | - return normalized.endswith(".html") | |
| 1538 | + return None | |
| 1539 | + normalized = target.split("#", 1)[0].split("?", 1)[0].strip() | |
| 1540 | + return normalized or None | |
| 1463 | 1541 | |
| 1464 | 1542 | def _suggest_existing_html_targets(self, root: Path, missing: list[str]) -> list[str]: |
| 1465 | 1543 | available_by_directory: dict[Path, list[str]] = {} |
tests/test_finalization.pymodified@@ -495,6 +495,53 @@ def test_verification_repair_guidance_keeps_multi_file_quality_worklist( | ||
| 495 | 495 | assert str(chapter_paths[-1].resolve(strict=False)) in repair.allowed_paths |
| 496 | 496 | |
| 497 | 497 | |
| 498 | +def test_verification_repair_guidance_keeps_quality_targets_with_link_repairs( | |
| 499 | + temp_dir: Path, | |
| 500 | +) -> None: | |
| 501 | + guide = temp_dir / "guides" / "nginx" | |
| 502 | + chapters = guide / "chapters" | |
| 503 | + chapters.mkdir(parents=True) | |
| 504 | + broken_asset_page = chapters / "07-performance.html" | |
| 505 | + thin_page = chapters / "06-security.html" | |
| 506 | + broken_asset_page.write_text('<link rel="stylesheet" href="../styles.css">\n') | |
| 507 | + thin_page.write_text("<h1>Security</h1>\n") | |
| 508 | + dod = create_definition_of_done("Create an equally thorough HTML guide.") | |
| 509 | + dod.evidence = [ | |
| 510 | + VerificationEvidence( | |
| 511 | + command="links", | |
| 512 | + passed=False, | |
| 513 | + output=( | |
| 514 | + "Missing local HTML links:\n" | |
| 515 | + f"{broken_asset_page}:../styles.css -> {guide / 'styles.css'}\n" | |
| 516 | + ), | |
| 517 | + ), | |
| 518 | + VerificationEvidence( | |
| 519 | + command="quality", | |
| 520 | + passed=False, | |
| 521 | + output=( | |
| 522 | + "HTML guide content quality issues:\n" | |
| 523 | + f"{thin_page}: thin content (1348 text chars, expected at least 1758)\n" | |
| 524 | + ), | |
| 525 | + ), | |
| 526 | + ] | |
| 527 | + | |
| 528 | + guidance = _build_verification_repair_guidance( | |
| 529 | + dod, | |
| 530 | + project_root=temp_dir, | |
| 531 | + ) | |
| 532 | + repair = extract_active_repair_context( | |
| 533 | + [Message(role=Role.USER, content=guidance)] | |
| 534 | + ) | |
| 535 | + | |
| 536 | + assert f"Fix the broken local reference `../styles.css` in `{broken_asset_page}`" in guidance | |
| 537 | + assert f"Improve `{thin_page}`: thin content" in guidance | |
| 538 | + assert "continue with the listed content-quality targets" in guidance | |
| 539 | + assert "do not declare completion while any listed quality target remains" in guidance | |
| 540 | + assert repair is not None | |
| 541 | + assert repair.artifact_path == str(broken_asset_page.resolve(strict=False)) | |
| 542 | + assert str(thin_page.resolve(strict=False)) in repair.allowed_paths | |
| 543 | + | |
| 544 | + | |
| 498 | 545 | @pytest.mark.asyncio |
| 499 | 546 | async def test_turn_finalizer_records_skipped_verification_observation( |
| 500 | 547 | temp_dir: Path, |
tests/test_safeguard_services.pymodified@@ -417,6 +417,50 @@ def test_pre_action_validator_blocks_placeholder_html_patch(tmp_path: Path) -> N | ||
| 417 | 417 | assert "coming soon" in result.suggestion |
| 418 | 418 | |
| 419 | 419 | |
| 420 | +def test_pre_action_validator_blocks_missing_local_html_asset_href(tmp_path: Path) -> None: | |
| 421 | + validator = PreActionValidator() | |
| 422 | + page = tmp_path / "guide" / "chapters" / "07-performance.html" | |
| 423 | + | |
| 424 | + result = validator.validate( | |
| 425 | + "write", | |
| 426 | + { | |
| 427 | + "file_path": str(page), | |
| 428 | + "content": ( | |
| 429 | + '<html><head><link rel="stylesheet" href="../styles.css"></head>' | |
| 430 | + "<body><h1>Performance</h1><p>Concrete content.</p></body></html>" | |
| 431 | + ), | |
| 432 | + }, | |
| 433 | + ) | |
| 434 | + | |
| 435 | + assert result.valid is False | |
| 436 | + assert result.reason == "HTML local asset references do not exist" | |
| 437 | + assert "../styles.css" in result.suggestion | |
| 438 | + assert "create the referenced asset first" in result.suggestion | |
| 439 | + | |
| 440 | + | |
| 441 | +def test_pre_action_validator_allows_existing_local_html_asset_href( | |
| 442 | + tmp_path: Path, | |
| 443 | +) -> None: | |
| 444 | + validator = PreActionValidator() | |
| 445 | + guide = tmp_path / "guide" | |
| 446 | + chapters = guide / "chapters" | |
| 447 | + chapters.mkdir(parents=True) | |
| 448 | + (guide / "styles.css").write_text("body { color: #222; }\n") | |
| 449 | + | |
| 450 | + result = validator.validate( | |
| 451 | + "write", | |
| 452 | + { | |
| 453 | + "file_path": str(chapters / "07-performance.html"), | |
| 454 | + "content": ( | |
| 455 | + '<html><head><link rel="stylesheet" href="../styles.css"></head>' | |
| 456 | + "<body><h1>Performance</h1><p>Concrete content.</p></body></html>" | |
| 457 | + ), | |
| 458 | + }, | |
| 459 | + ) | |
| 460 | + | |
| 461 | + assert result.valid is True | |
| 462 | + | |
| 463 | + | |
| 420 | 464 | def test_pre_action_validator_blocks_shell_text_rewrite_for_html_target() -> None: |
| 421 | 465 | validator = PreActionValidator() |
| 422 | 466 | |