@@ -456,6 +456,9 @@ contains |
| 456 | 456 | end do |
| 457 | 457 | end do |
| 458 | 458 | |
| 459 | + ! Minimize DFA to reduce state count |
| 460 | + call minimize_dfa(opt%dfa) |
| 461 | + |
| 459 | 462 | opt%dfa%compiled = .true. |
| 460 | 463 | opt%use_dfa = .true. |
| 461 | 464 | |
@@ -498,6 +501,240 @@ contains |
| 498 | 501 | |
| 499 | 502 | end function find_or_create_dfa_state |
| 500 | 503 | |
| 504 | + subroutine minimize_dfa(dfa) |
| 505 | + !> Minimize DFA using Hopcroft's algorithm |
| 506 | + !> Merges equivalent states to reduce DFA size |
| 507 | + type(compiled_dfa_t), intent(inout) :: dfa |
| 508 | + |
| 509 | + integer :: num_states, num_partitions |
| 510 | + integer, allocatable :: partition(:) ! partition(state) = partition ID |
| 511 | + integer, allocatable :: part_size(:) ! Size of each partition |
| 512 | + integer, allocatable :: representative(:) ! Representative state for each partition |
| 513 | + integer, allocatable :: new_state_id(:) ! Mapping from old state to new state ID |
| 514 | + type(dfa_state_t), allocatable :: new_states(:) |
| 515 | + |
| 516 | + logical, allocatable :: in_worklist(:) ! Is partition in worklist? |
| 517 | + integer, allocatable :: worklist(:) ! Partitions to process |
| 518 | + integer :: work_head, work_tail |
| 519 | + |
| 520 | + integer :: i, c, state, target, part_id |
| 521 | + integer :: num_accept, num_reject |
| 522 | + integer :: old_part, new_part_id |
| 523 | + logical :: needs_split |
| 524 | + integer, allocatable :: split_marker(:) ! Which states go to partition A on char c |
| 525 | + integer :: new_num_states |
| 526 | + |
| 527 | + num_states = dfa%num_states |
| 528 | + if (num_states <= 1) return ! Nothing to minimize |
| 529 | + |
| 530 | + ! Allocate working arrays |
| 531 | + allocate(partition(num_states)) |
| 532 | + allocate(part_size(num_states)) |
| 533 | + allocate(representative(num_states)) |
| 534 | + allocate(new_state_id(num_states)) |
| 535 | + allocate(in_worklist(num_states)) |
| 536 | + allocate(worklist(num_states)) |
| 537 | + allocate(split_marker(num_states)) |
| 538 | + |
| 539 | + ! Initialize partitions: accepting states = partition 1, non-accepting = partition 2 |
| 540 | + partition = 0 |
| 541 | + part_size = 0 |
| 542 | + num_accept = 0 |
| 543 | + num_reject = 0 |
| 544 | + |
| 545 | + do i = 1, num_states |
| 546 | + if (dfa%states(i)%is_accept) then |
| 547 | + partition(i) = 1 |
| 548 | + num_accept = num_accept + 1 |
| 549 | + else |
| 550 | + partition(i) = 2 |
| 551 | + num_reject = num_reject + 1 |
| 552 | + end if |
| 553 | + end do |
| 554 | + |
| 555 | + part_size(1) = num_accept |
| 556 | + part_size(2) = num_reject |
| 557 | + num_partitions = 2 |
| 558 | + |
| 559 | + ! Handle edge case: all accepting or all rejecting |
| 560 | + if (num_accept == 0 .or. num_reject == 0) then |
| 561 | + num_partitions = 1 |
| 562 | + partition = 1 |
| 563 | + part_size(1) = num_states |
| 564 | + end if |
| 565 | + |
| 566 | + ! Initialize worklist with smaller partition (Hopcroft optimization) |
| 567 | + in_worklist = .false. |
| 568 | + work_head = 1 |
| 569 | + work_tail = 0 |
| 570 | + |
| 571 | + if (num_partitions == 2) then |
| 572 | + if (num_accept <= num_reject) then |
| 573 | + work_tail = 1 |
| 574 | + worklist(1) = 1 |
| 575 | + in_worklist(1) = .true. |
| 576 | + else |
| 577 | + work_tail = 1 |
| 578 | + worklist(1) = 2 |
| 579 | + in_worklist(2) = .true. |
| 580 | + end if |
| 581 | + end if |
| 582 | + |
| 583 | + ! Main refinement loop |
| 584 | + do while (work_head <= work_tail) |
| 585 | + part_id = worklist(work_head) |
| 586 | + work_head = work_head + 1 |
| 587 | + in_worklist(part_id) = .false. |
| 588 | + |
| 589 | + ! For each character, check if this partition splits others |
| 590 | + do c = 0, 255 |
| 591 | + ! Mark states that transition to partition part_id on character c |
| 592 | + split_marker = 0 |
| 593 | + do state = 1, num_states |
| 594 | + target = dfa%states(state)%transitions(c) |
| 595 | + if (target > 0 .and. target <= num_states) then |
| 596 | + if (partition(target) == part_id) then |
| 597 | + split_marker(state) = 1 |
| 598 | + end if |
| 599 | + end if |
| 600 | + end do |
| 601 | + |
| 602 | + ! Check each existing partition for splits |
| 603 | + do old_part = 1, num_partitions |
| 604 | + ! Count states in this partition that go to part_id vs don't |
| 605 | + num_accept = 0 ! Reuse: count going to part_id |
| 606 | + num_reject = 0 ! Reuse: count not going to part_id |
| 607 | + |
| 608 | + do state = 1, num_states |
| 609 | + if (partition(state) == old_part) then |
| 610 | + if (split_marker(state) == 1) then |
| 611 | + num_accept = num_accept + 1 |
| 612 | + else |
| 613 | + num_reject = num_reject + 1 |
| 614 | + end if |
| 615 | + end if |
| 616 | + end do |
| 617 | + |
| 618 | + ! If partition needs splitting (has both types) |
| 619 | + needs_split = (num_accept > 0 .and. num_reject > 0) |
| 620 | + |
| 621 | + if (needs_split) then |
| 622 | + ! Create new partition for the smaller group |
| 623 | + num_partitions = num_partitions + 1 |
| 624 | + new_part_id = num_partitions |
| 625 | + |
| 626 | + ! Move the smaller group to new partition |
| 627 | + if (num_accept <= num_reject) then |
| 628 | + ! Move states going to part_id to new partition |
| 629 | + do state = 1, num_states |
| 630 | + if (partition(state) == old_part .and. split_marker(state) == 1) then |
| 631 | + partition(state) = new_part_id |
| 632 | + end if |
| 633 | + end do |
| 634 | + part_size(new_part_id) = num_accept |
| 635 | + part_size(old_part) = num_reject |
| 636 | + else |
| 637 | + ! Move states NOT going to part_id to new partition |
| 638 | + do state = 1, num_states |
| 639 | + if (partition(state) == old_part .and. split_marker(state) == 0) then |
| 640 | + partition(state) = new_part_id |
| 641 | + end if |
| 642 | + end do |
| 643 | + part_size(new_part_id) = num_reject |
| 644 | + part_size(old_part) = num_accept |
| 645 | + end if |
| 646 | + |
| 647 | + ! Update worklist |
| 648 | + if (in_worklist(old_part)) then |
| 649 | + ! Both halves need to be in worklist |
| 650 | + work_tail = work_tail + 1 |
| 651 | + worklist(work_tail) = new_part_id |
| 652 | + in_worklist(new_part_id) = .true. |
| 653 | + else |
| 654 | + ! Add smaller partition to worklist |
| 655 | + if (part_size(new_part_id) <= part_size(old_part)) then |
| 656 | + work_tail = work_tail + 1 |
| 657 | + worklist(work_tail) = new_part_id |
| 658 | + in_worklist(new_part_id) = .true. |
| 659 | + else |
| 660 | + work_tail = work_tail + 1 |
| 661 | + worklist(work_tail) = old_part |
| 662 | + in_worklist(old_part) = .true. |
| 663 | + end if |
| 664 | + end if |
| 665 | + end if |
| 666 | + end do |
| 667 | + end do |
| 668 | + end do |
| 669 | + |
| 670 | + ! Check if minimization actually reduced states |
| 671 | + if (num_partitions >= num_states) then |
| 672 | + ! No reduction possible |
| 673 | + deallocate(partition, part_size, representative, new_state_id) |
| 674 | + deallocate(in_worklist, worklist, split_marker) |
| 675 | + return |
| 676 | + end if |
| 677 | + |
| 678 | + ! Find representative for each partition (lowest numbered state) |
| 679 | + representative = 0 |
| 680 | + do state = 1, num_states |
| 681 | + part_id = partition(state) |
| 682 | + if (representative(part_id) == 0) then |
| 683 | + representative(part_id) = state |
| 684 | + end if |
| 685 | + end do |
| 686 | + |
| 687 | + ! Build new state IDs (compact numbering) |
| 688 | + new_state_id = 0 |
| 689 | + new_num_states = 0 |
| 690 | + do part_id = 1, num_partitions |
| 691 | + if (representative(part_id) > 0) then |
| 692 | + new_num_states = new_num_states + 1 |
| 693 | + ! Map all states in this partition to new state ID |
| 694 | + do state = 1, num_states |
| 695 | + if (partition(state) == part_id) then |
| 696 | + new_state_id(state) = new_num_states |
| 697 | + end if |
| 698 | + end do |
| 699 | + end if |
| 700 | + end do |
| 701 | + |
| 702 | + ! Build minimized DFA |
| 703 | + allocate(new_states(new_num_states)) |
| 704 | + |
| 705 | + do part_id = 1, num_partitions |
| 706 | + state = representative(part_id) |
| 707 | + if (state == 0) cycle |
| 708 | + |
| 709 | + i = new_state_id(state) |
| 710 | + new_states(i)%is_accept = dfa%states(state)%is_accept |
| 711 | + new_states(i)%state_hash = dfa%states(state)%state_hash |
| 712 | + new_states(i)%nfa_states = dfa%states(state)%nfa_states |
| 713 | + |
| 714 | + ! Remap transitions |
| 715 | + do c = 0, 255 |
| 716 | + target = dfa%states(state)%transitions(c) |
| 717 | + if (target > 0 .and. target <= num_states) then |
| 718 | + new_states(i)%transitions(c) = new_state_id(target) |
| 719 | + else |
| 720 | + new_states(i)%transitions(c) = DFA_DEAD_STATE |
| 721 | + end if |
| 722 | + end do |
| 723 | + end do |
| 724 | + |
| 725 | + ! Update DFA with minimized version |
| 726 | + deallocate(dfa%states) |
| 727 | + allocate(dfa%states(new_num_states)) |
| 728 | + dfa%states = new_states |
| 729 | + dfa%start_state = new_state_id(dfa%start_state) |
| 730 | + dfa%num_states = new_num_states |
| 731 | + |
| 732 | + ! Cleanup |
| 733 | + deallocate(partition, part_size, representative, new_state_id) |
| 734 | + deallocate(in_worklist, worklist, split_marker, new_states) |
| 735 | + |
| 736 | + end subroutine minimize_dfa |
| 737 | + |
| 501 | 738 | subroutine compute_char_transitions_simple(nfa, current, c, next_set) |
| 502 | 739 | !> Compute character transitions without case folding (for DFA compilation) |
| 503 | 740 | type(nfa_t), intent(in) :: nfa |