tenseleyflow/loader / ffab688

Browse files

Validate generated HTML asset links

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
ffab6888cb582f29553ee3c5972bdb23b766fa74
Parents
2c722c3
Tree
039a614

4 changed files

StatusFile+-
M src/loader/runtime/finalization.py 48 13
M src/loader/runtime/safeguard_services.py 87 9
M tests/test_finalization.py 47 0
M tests/test_safeguard_services.py 44 0
src/loader/runtime/finalization.pymodified
@@ -1218,6 +1218,8 @@ def _build_verification_repair_guidance(
12181218
 
12191219
     lines = ["Repair focus:"]
12201220
     lines.extend(f"- {item}" for item in fixes)
1221
+    if quality_targets:
1222
+        lines.extend(_html_quality_repair_target_lines(quality_targets))
12211223
     primary_target = repair_targets[0] if repair_targets else None
12221224
     if primary_target is not None:
12231225
         expected_action = _repair_expected_action_line(
@@ -1241,6 +1243,16 @@ def _build_verification_repair_guidance(
12411243
                 ),
12421244
                 "- Do not reread unrelated reference materials or restart discovery "
12431245
                 "while this concrete repair target is unresolved.",
1246
+                *(
1247
+                    [
1248
+                        "- After the concrete reference repair passes, continue with the "
1249
+                        "listed content-quality targets; do not declare completion while "
1250
+                        "any listed quality target remains.",
1251
+                        *_html_quality_repair_strategy_lines(),
1252
+                    ]
1253
+                    if quality_targets
1254
+                    else []
1255
+                ),
12441256
             ]
12451257
         )
12461258
     else:
@@ -1310,24 +1322,13 @@ def _build_html_quality_repair_guidance(
13101322
     targets: list[VerificationQualityRepairTarget],
13111323
 ) -> str:
13121324
     lines = ["Repair focus:"]
1313
-    visible_targets = targets[:_MAX_HTML_QUALITY_REPAIR_TARGETS]
1314
-    for target in visible_targets:
1315
-        lines.append(f"- Improve `{target.artifact_path}`: {target.issue}.")
1316
-    hidden_count = len(targets) - len(visible_targets)
1317
-    if hidden_count > 0:
1318
-        lines.append(
1319
-            f"- {hidden_count} additional generated artifact(s) also failed quality; "
1320
-            "repair the listed targets first, then rerun verification for the next batch."
1321
-        )
1325
+    lines.extend(_html_quality_repair_target_lines(targets))
13221326
     primary = targets[0]
13231327
     lines.extend(
13241328
         [
13251329
             f"- Immediate next step: edit `{primary.artifact_path}` with a substantial "
13261330
             "expansion or replacement that satisfies its listed quality issue.",
1327
-            "- If a target says thin content, add enough concrete prose to comfortably "
1328
-            "exceed the expected text-character floor; if it says insufficient "
1329
-            "structured content, add enough real sections, lists, code, tables, or "
1330
-            "other content blocks to exceed the block floor.",
1331
+            *_html_quality_repair_strategy_lines(),
13311332
             "- Repair every listed quality target in order before any final answer; "
13321333
             "do not stop after touching only the first file.",
13331334
             "- Update the listed generated artifacts directly; do not recreate the "
@@ -1339,6 +1340,31 @@ def _build_html_quality_repair_guidance(
13391340
     return "\n".join(lines)
13401341
 
13411342
 
1343
+def _html_quality_repair_target_lines(
1344
+    targets: list[VerificationQualityRepairTarget],
1345
+) -> list[str]:
1346
+    lines: list[str] = []
1347
+    visible_targets = targets[:_MAX_HTML_QUALITY_REPAIR_TARGETS]
1348
+    for target in visible_targets:
1349
+        lines.append(f"- Improve `{target.artifact_path}`: {target.issue}.")
1350
+    hidden_count = len(targets) - len(visible_targets)
1351
+    if hidden_count > 0:
1352
+        lines.append(
1353
+            f"- {hidden_count} additional generated artifact(s) also failed quality; "
1354
+            "repair the listed targets first, then rerun verification for the next batch."
1355
+        )
1356
+    return lines
1357
+
1358
+
1359
+def _html_quality_repair_strategy_lines() -> list[str]:
1360
+    return [
1361
+        "- If a target says thin content, add enough concrete prose to comfortably "
1362
+        "exceed the expected text-character floor; if it says insufficient "
1363
+        "structured content, add enough real sections, lists, code, tables, or "
1364
+        "other content blocks to exceed the block floor.",
1365
+    ]
1366
+
1367
+
13421368
 def _build_verification_failure_recovery_nudge(
13431369
     dod: DefinitionOfDone,
13441370
     *,
@@ -1392,6 +1418,14 @@ def _build_verification_failure_recovery_nudge(
13921418
                 " Use the existing artifact files already on disk as the source of truth: "
13931419
                 f"{preview}."
13941420
             )
1421
+        quality_hint = ""
1422
+        if quality_targets:
1423
+            quality_hint = (
1424
+                f" Verification also identified {len(quality_targets)} generated "
1425
+                "artifact content-quality target(s); after the reference repair, "
1426
+                "continue with those quality targets and do not summarize completion "
1427
+                "while they remain."
1428
+            )
13951429
         return (
13961430
             "Verification already identified the concrete repair target. "
13971431
             "Do not restart discovery or reread unrelated references. "
@@ -1400,6 +1434,7 @@ def _build_verification_failure_recovery_nudge(
14001434
             f"`{primary_target.failing_reference}`. "
14011435
             f"{expected_action[2:] if expected_action.startswith('- ') else expected_action}"
14021436
             f"{source_hint}"
1437
+            f"{quality_hint}"
14031438
         )
14041439
     if quality_targets:
14051440
         primary_target = quality_targets[0]
src/loader/runtime/safeguard_services.pymodified
@@ -800,6 +800,13 @@ class PreActionValidator:
800800
         if not html_declared_target_result.valid:
801801
             return html_declared_target_result
802802
 
803
+        html_asset_result = self._validate_html_local_asset_links(
804
+            str(file_path),
805
+            str(content),
806
+        )
807
+        if not html_asset_result.valid:
808
+            return html_asset_result
809
+
803810
         html_root_coverage_result = self._validate_html_root_link_coverage(
804811
             str(file_path),
805812
             str(content),
@@ -877,6 +884,13 @@ class PreActionValidator:
877884
         if not html_declared_target_result.valid:
878885
             return html_declared_target_result
879886
 
887
+        html_asset_result = self._validate_html_local_asset_links(
888
+            str(file_path),
889
+            prospective_content,
890
+        )
891
+        if not html_asset_result.valid:
892
+            return html_asset_result
893
+
880894
         return ValidationResult(valid=True)
881895
 
882896
     def _validate_patch(self, arguments: dict) -> ValidationResult:
@@ -994,6 +1008,47 @@ class PreActionValidator:
9941008
             "\n".join(added_fragments),
9951009
         )
9961010
 
1011
+    def _validate_html_local_asset_links(
1012
+        self,
1013
+        file_path: str,
1014
+        content: str,
1015
+    ) -> ValidationResult:
1016
+        normalized = Path(file_path).expanduser()
1017
+        if normalized.suffix.lower() not in {".html", ".htm"}:
1018
+            return ValidationResult(valid=True)
1019
+
1020
+        missing: list[str] = []
1021
+        for href, resolved in self._collect_local_href_targets(normalized, content):
1022
+            target = self._strip_local_href_target(href)
1023
+            if target is None:
1024
+                continue
1025
+            if self._is_local_html_link_target(target):
1026
+                continue
1027
+            if not Path(target).suffix:
1028
+                continue
1029
+            if resolved.exists():
1030
+                continue
1031
+            if href not in missing:
1032
+                missing.append(href)
1033
+
1034
+        if not missing:
1035
+            return ValidationResult(valid=True)
1036
+
1037
+        preview = ", ".join(missing[:3])
1038
+        if len(missing) > 3:
1039
+            preview += ", ..."
1040
+        return ValidationResult(
1041
+            valid=False,
1042
+            reason="HTML local asset references do not exist",
1043
+            suggestion=(
1044
+                "Use only existing local assets for non-HTML href values. "
1045
+                f"Missing local asset href(s): {preview}. Remove the asset link, "
1046
+                "create the referenced asset first, inline the styling/content, or point "
1047
+                "the href at an existing local file."
1048
+            ),
1049
+            severity="error",
1050
+        )
1051
+
9971052
     def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult:
9981053
         path = Path(file_path).expanduser()
9991054
         if path.exists() or not path.suffix or not path.parent.exists():
@@ -1377,20 +1432,38 @@ class PreActionValidator:
13771432
         self,
13781433
         file_path: Path,
13791434
         content: str,
1435
+    ) -> list[tuple[str, Path]]:
1436
+        targets: list[tuple[str, Path]] = []
1437
+        seen: set[str] = set()
1438
+        for href, resolved in self._collect_local_href_targets(file_path, content):
1439
+            target_text = self._strip_local_href_target(href)
1440
+            if target_text is None or not self._is_local_html_link_target(target_text):
1441
+                continue
1442
+            key = f"{target_text}::{resolved}"
1443
+            if key in seen:
1444
+                continue
1445
+            seen.add(key)
1446
+            targets.append((href, resolved))
1447
+        return targets
1448
+
1449
+    def _collect_local_href_targets(
1450
+        self,
1451
+        file_path: Path,
1452
+        content: str,
13801453
     ) -> list[tuple[str, Path]]:
13811454
         pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
13821455
         targets: list[tuple[str, Path]] = []
13831456
         seen: set[str] = set()
13841457
         for href in pattern.findall(content):
1385
-            target_text = href.strip()
1386
-            if not self._is_local_html_link_target(target_text):
1458
+            target_text = self._strip_local_href_target(href)
1459
+            if target_text is None:
13871460
                 continue
13881461
             resolved = (file_path.parent / target_text).resolve(strict=False)
1389
-            key = f"{target_text}::{resolved}"
1462
+            key = f"{href}::{resolved}"
13901463
             if key in seen:
13911464
                 continue
13921465
             seen.add(key)
1393
-            targets.append((target_text, resolved))
1466
+            targets.append((href, resolved))
13941467
         return targets
13951468
 
13961469
     def _collect_declared_html_targets(
@@ -1451,15 +1524,20 @@ class PreActionValidator:
14511524
 
14521525
     @staticmethod
14531526
     def _is_local_html_link_target(href: str) -> bool:
1527
+        normalized = PreActionValidator._strip_local_href_target(href)
1528
+        return bool(normalized and normalized.lower().endswith((".html", ".htm")))
1529
+
1530
+    @staticmethod
1531
+    def _strip_local_href_target(href: str) -> str | None:
14541532
         target = href.strip()
14551533
         if not target:
1456
-            return False
1534
+            return None
14571535
         if target.startswith(("#", "mailto:", "tel:", "javascript:")):
1458
-            return False
1536
+            return None
14591537
         if "://" in target:
1460
-            return False
1461
-        normalized = target.split("#", 1)[0].split("?", 1)[0].strip().lower()
1462
-        return normalized.endswith(".html")
1538
+            return None
1539
+        normalized = target.split("#", 1)[0].split("?", 1)[0].strip()
1540
+        return normalized or None
14631541
 
14641542
     def _suggest_existing_html_targets(self, root: Path, missing: list[str]) -> list[str]:
14651543
         available_by_directory: dict[Path, list[str]] = {}
tests/test_finalization.pymodified
@@ -495,6 +495,53 @@ def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
495495
     assert str(chapter_paths[-1].resolve(strict=False)) in repair.allowed_paths
496496
 
497497
 
498
+def test_verification_repair_guidance_keeps_quality_targets_with_link_repairs(
499
+    temp_dir: Path,
500
+) -> None:
501
+    guide = temp_dir / "guides" / "nginx"
502
+    chapters = guide / "chapters"
503
+    chapters.mkdir(parents=True)
504
+    broken_asset_page = chapters / "07-performance.html"
505
+    thin_page = chapters / "06-security.html"
506
+    broken_asset_page.write_text('<link rel="stylesheet" href="../styles.css">\n')
507
+    thin_page.write_text("<h1>Security</h1>\n")
508
+    dod = create_definition_of_done("Create an equally thorough HTML guide.")
509
+    dod.evidence = [
510
+        VerificationEvidence(
511
+            command="links",
512
+            passed=False,
513
+            output=(
514
+                "Missing local HTML links:\n"
515
+                f"{broken_asset_page}:../styles.css -> {guide / 'styles.css'}\n"
516
+            ),
517
+        ),
518
+        VerificationEvidence(
519
+            command="quality",
520
+            passed=False,
521
+            output=(
522
+                "HTML guide content quality issues:\n"
523
+                f"{thin_page}: thin content (1348 text chars, expected at least 1758)\n"
524
+            ),
525
+        ),
526
+    ]
527
+
528
+    guidance = _build_verification_repair_guidance(
529
+        dod,
530
+        project_root=temp_dir,
531
+    )
532
+    repair = extract_active_repair_context(
533
+        [Message(role=Role.USER, content=guidance)]
534
+    )
535
+
536
+    assert f"Fix the broken local reference `../styles.css` in `{broken_asset_page}`" in guidance
537
+    assert f"Improve `{thin_page}`: thin content" in guidance
538
+    assert "continue with the listed content-quality targets" in guidance
539
+    assert "do not declare completion while any listed quality target remains" in guidance
540
+    assert repair is not None
541
+    assert repair.artifact_path == str(broken_asset_page.resolve(strict=False))
542
+    assert str(thin_page.resolve(strict=False)) in repair.allowed_paths
543
+
544
+
498545
 @pytest.mark.asyncio
499546
 async def test_turn_finalizer_records_skipped_verification_observation(
500547
     temp_dir: Path,
tests/test_safeguard_services.pymodified
@@ -417,6 +417,50 @@ def test_pre_action_validator_blocks_placeholder_html_patch(tmp_path: Path) -> N
417417
     assert "coming soon" in result.suggestion
418418
 
419419
 
420
+def test_pre_action_validator_blocks_missing_local_html_asset_href(tmp_path: Path) -> None:
421
+    validator = PreActionValidator()
422
+    page = tmp_path / "guide" / "chapters" / "07-performance.html"
423
+
424
+    result = validator.validate(
425
+        "write",
426
+        {
427
+            "file_path": str(page),
428
+            "content": (
429
+                '<html><head><link rel="stylesheet" href="../styles.css"></head>'
430
+                "<body><h1>Performance</h1><p>Concrete content.</p></body></html>"
431
+            ),
432
+        },
433
+    )
434
+
435
+    assert result.valid is False
436
+    assert result.reason == "HTML local asset references do not exist"
437
+    assert "../styles.css" in result.suggestion
438
+    assert "create the referenced asset first" in result.suggestion
439
+
440
+
441
+def test_pre_action_validator_allows_existing_local_html_asset_href(
442
+    tmp_path: Path,
443
+) -> None:
444
+    validator = PreActionValidator()
445
+    guide = tmp_path / "guide"
446
+    chapters = guide / "chapters"
447
+    chapters.mkdir(parents=True)
448
+    (guide / "styles.css").write_text("body { color: #222; }\n")
449
+
450
+    result = validator.validate(
451
+        "write",
452
+        {
453
+            "file_path": str(chapters / "07-performance.html"),
454
+            "content": (
455
+                '<html><head><link rel="stylesheet" href="../styles.css"></head>'
456
+                "<body><h1>Performance</h1><p>Concrete content.</p></body></html>"
457
+            ),
458
+        },
459
+    )
460
+
461
+    assert result.valid is True
462
+
463
+
420464
 def test_pre_action_validator_blocks_shell_text_rewrite_for_html_target() -> None:
421465
     validator = PreActionValidator()
422466