@@ -456,6 +456,9 @@ contains |
| 456 | end do | 456 | end do |
| 457 | end do | 457 | end do |
| 458 | | 458 | |
| | 459 | + ! Minimize DFA to reduce state count |
| | 460 | + call minimize_dfa(opt%dfa) |
| | 461 | + |
| 459 | opt%dfa%compiled = .true. | 462 | opt%dfa%compiled = .true. |
| 460 | opt%use_dfa = .true. | 463 | opt%use_dfa = .true. |
| 461 | | 464 | |
@@ -498,6 +501,240 @@ contains |
| 498 | | 501 | |
| 499 | end function find_or_create_dfa_state | 502 | end function find_or_create_dfa_state |
| 500 | | 503 | |
| | 504 | + subroutine minimize_dfa(dfa) |
| | 505 | + !> Minimize DFA using Hopcroft's algorithm |
| | 506 | + !> Merges equivalent states to reduce DFA size |
| | 507 | + type(compiled_dfa_t), intent(inout) :: dfa |
| | 508 | + |
| | 509 | + integer :: num_states, num_partitions |
| | 510 | + integer, allocatable :: partition(:) ! partition(state) = partition ID |
| | 511 | + integer, allocatable :: part_size(:) ! Size of each partition |
| | 512 | + integer, allocatable :: representative(:) ! Representative state for each partition |
| | 513 | + integer, allocatable :: new_state_id(:) ! Mapping from old state to new state ID |
| | 514 | + type(dfa_state_t), allocatable :: new_states(:) |
| | 515 | + |
| | 516 | + logical, allocatable :: in_worklist(:) ! Is partition in worklist? |
| | 517 | + integer, allocatable :: worklist(:) ! Partitions to process |
| | 518 | + integer :: work_head, work_tail |
| | 519 | + |
| | 520 | + integer :: i, c, state, target, part_id |
| | 521 | + integer :: num_accept, num_reject |
| | 522 | + integer :: old_part, new_part_id |
| | 523 | + logical :: needs_split |
| | 524 | + integer, allocatable :: split_marker(:) ! Which states go to partition A on char c |
| | 525 | + integer :: new_num_states |
| | 526 | + |
| | 527 | + num_states = dfa%num_states |
| | 528 | + if (num_states <= 1) return ! Nothing to minimize |
| | 529 | + |
| | 530 | + ! Allocate working arrays |
| | 531 | + allocate(partition(num_states)) |
| | 532 | + allocate(part_size(num_states)) |
| | 533 | + allocate(representative(num_states)) |
| | 534 | + allocate(new_state_id(num_states)) |
| | 535 | + allocate(in_worklist(num_states)) |
| | 536 | + allocate(worklist(num_states)) |
| | 537 | + allocate(split_marker(num_states)) |
| | 538 | + |
| | 539 | + ! Initialize partitions: accepting states = partition 1, non-accepting = partition 2 |
| | 540 | + partition = 0 |
| | 541 | + part_size = 0 |
| | 542 | + num_accept = 0 |
| | 543 | + num_reject = 0 |
| | 544 | + |
| | 545 | + do i = 1, num_states |
| | 546 | + if (dfa%states(i)%is_accept) then |
| | 547 | + partition(i) = 1 |
| | 548 | + num_accept = num_accept + 1 |
| | 549 | + else |
| | 550 | + partition(i) = 2 |
| | 551 | + num_reject = num_reject + 1 |
| | 552 | + end if |
| | 553 | + end do |
| | 554 | + |
| | 555 | + part_size(1) = num_accept |
| | 556 | + part_size(2) = num_reject |
| | 557 | + num_partitions = 2 |
| | 558 | + |
| | 559 | + ! Handle edge case: all accepting or all rejecting |
| | 560 | + if (num_accept == 0 .or. num_reject == 0) then |
| | 561 | + num_partitions = 1 |
| | 562 | + partition = 1 |
| | 563 | + part_size(1) = num_states |
| | 564 | + end if |
| | 565 | + |
| | 566 | + ! Initialize worklist with smaller partition (Hopcroft optimization) |
| | 567 | + in_worklist = .false. |
| | 568 | + work_head = 1 |
| | 569 | + work_tail = 0 |
| | 570 | + |
| | 571 | + if (num_partitions == 2) then |
| | 572 | + if (num_accept <= num_reject) then |
| | 573 | + work_tail = 1 |
| | 574 | + worklist(1) = 1 |
| | 575 | + in_worklist(1) = .true. |
| | 576 | + else |
| | 577 | + work_tail = 1 |
| | 578 | + worklist(1) = 2 |
| | 579 | + in_worklist(2) = .true. |
| | 580 | + end if |
| | 581 | + end if |
| | 582 | + |
| | 583 | + ! Main refinement loop |
| | 584 | + do while (work_head <= work_tail) |
| | 585 | + part_id = worklist(work_head) |
| | 586 | + work_head = work_head + 1 |
| | 587 | + in_worklist(part_id) = .false. |
| | 588 | + |
| | 589 | + ! For each character, check if this partition splits others |
| | 590 | + do c = 0, 255 |
| | 591 | + ! Mark states that transition to partition part_id on character c |
| | 592 | + split_marker = 0 |
| | 593 | + do state = 1, num_states |
| | 594 | + target = dfa%states(state)%transitions(c) |
| | 595 | + if (target > 0 .and. target <= num_states) then |
| | 596 | + if (partition(target) == part_id) then |
| | 597 | + split_marker(state) = 1 |
| | 598 | + end if |
| | 599 | + end if |
| | 600 | + end do |
| | 601 | + |
| | 602 | + ! Check each existing partition for splits |
| | 603 | + do old_part = 1, num_partitions |
| | 604 | + ! Count states in this partition that go to part_id vs don't |
| | 605 | + num_accept = 0 ! Reuse: count going to part_id |
| | 606 | + num_reject = 0 ! Reuse: count not going to part_id |
| | 607 | + |
| | 608 | + do state = 1, num_states |
| | 609 | + if (partition(state) == old_part) then |
| | 610 | + if (split_marker(state) == 1) then |
| | 611 | + num_accept = num_accept + 1 |
| | 612 | + else |
| | 613 | + num_reject = num_reject + 1 |
| | 614 | + end if |
| | 615 | + end if |
| | 616 | + end do |
| | 617 | + |
| | 618 | + ! If partition needs splitting (has both types) |
| | 619 | + needs_split = (num_accept > 0 .and. num_reject > 0) |
| | 620 | + |
| | 621 | + if (needs_split) then |
| | 622 | + ! Create new partition for the smaller group |
| | 623 | + num_partitions = num_partitions + 1 |
| | 624 | + new_part_id = num_partitions |
| | 625 | + |
| | 626 | + ! Move the smaller group to new partition |
| | 627 | + if (num_accept <= num_reject) then |
| | 628 | + ! Move states going to part_id to new partition |
| | 629 | + do state = 1, num_states |
| | 630 | + if (partition(state) == old_part .and. split_marker(state) == 1) then |
| | 631 | + partition(state) = new_part_id |
| | 632 | + end if |
| | 633 | + end do |
| | 634 | + part_size(new_part_id) = num_accept |
| | 635 | + part_size(old_part) = num_reject |
| | 636 | + else |
| | 637 | + ! Move states NOT going to part_id to new partition |
| | 638 | + do state = 1, num_states |
| | 639 | + if (partition(state) == old_part .and. split_marker(state) == 0) then |
| | 640 | + partition(state) = new_part_id |
| | 641 | + end if |
| | 642 | + end do |
| | 643 | + part_size(new_part_id) = num_reject |
| | 644 | + part_size(old_part) = num_accept |
| | 645 | + end if |
| | 646 | + |
| | 647 | + ! Update worklist |
| | 648 | + if (in_worklist(old_part)) then |
| | 649 | + ! Both halves need to be in worklist |
| | 650 | + work_tail = work_tail + 1 |
| | 651 | + worklist(work_tail) = new_part_id |
| | 652 | + in_worklist(new_part_id) = .true. |
| | 653 | + else |
| | 654 | + ! Add smaller partition to worklist |
| | 655 | + if (part_size(new_part_id) <= part_size(old_part)) then |
| | 656 | + work_tail = work_tail + 1 |
| | 657 | + worklist(work_tail) = new_part_id |
| | 658 | + in_worklist(new_part_id) = .true. |
| | 659 | + else |
| | 660 | + work_tail = work_tail + 1 |
| | 661 | + worklist(work_tail) = old_part |
| | 662 | + in_worklist(old_part) = .true. |
| | 663 | + end if |
| | 664 | + end if |
| | 665 | + end if |
| | 666 | + end do |
| | 667 | + end do |
| | 668 | + end do |
| | 669 | + |
| | 670 | + ! Check if minimization actually reduced states |
| | 671 | + if (num_partitions >= num_states) then |
| | 672 | + ! No reduction possible |
| | 673 | + deallocate(partition, part_size, representative, new_state_id) |
| | 674 | + deallocate(in_worklist, worklist, split_marker) |
| | 675 | + return |
| | 676 | + end if |
| | 677 | + |
| | 678 | + ! Find representative for each partition (lowest numbered state) |
| | 679 | + representative = 0 |
| | 680 | + do state = 1, num_states |
| | 681 | + part_id = partition(state) |
| | 682 | + if (representative(part_id) == 0) then |
| | 683 | + representative(part_id) = state |
| | 684 | + end if |
| | 685 | + end do |
| | 686 | + |
| | 687 | + ! Build new state IDs (compact numbering) |
| | 688 | + new_state_id = 0 |
| | 689 | + new_num_states = 0 |
| | 690 | + do part_id = 1, num_partitions |
| | 691 | + if (representative(part_id) > 0) then |
| | 692 | + new_num_states = new_num_states + 1 |
| | 693 | + ! Map all states in this partition to new state ID |
| | 694 | + do state = 1, num_states |
| | 695 | + if (partition(state) == part_id) then |
| | 696 | + new_state_id(state) = new_num_states |
| | 697 | + end if |
| | 698 | + end do |
| | 699 | + end if |
| | 700 | + end do |
| | 701 | + |
| | 702 | + ! Build minimized DFA |
| | 703 | + allocate(new_states(new_num_states)) |
| | 704 | + |
| | 705 | + do part_id = 1, num_partitions |
| | 706 | + state = representative(part_id) |
| | 707 | + if (state == 0) cycle |
| | 708 | + |
| | 709 | + i = new_state_id(state) |
| | 710 | + new_states(i)%is_accept = dfa%states(state)%is_accept |
| | 711 | + new_states(i)%state_hash = dfa%states(state)%state_hash |
| | 712 | + new_states(i)%nfa_states = dfa%states(state)%nfa_states |
| | 713 | + |
| | 714 | + ! Remap transitions |
| | 715 | + do c = 0, 255 |
| | 716 | + target = dfa%states(state)%transitions(c) |
| | 717 | + if (target > 0 .and. target <= num_states) then |
| | 718 | + new_states(i)%transitions(c) = new_state_id(target) |
| | 719 | + else |
| | 720 | + new_states(i)%transitions(c) = DFA_DEAD_STATE |
| | 721 | + end if |
| | 722 | + end do |
| | 723 | + end do |
| | 724 | + |
| | 725 | + ! Update DFA with minimized version |
| | 726 | + deallocate(dfa%states) |
| | 727 | + allocate(dfa%states(new_num_states)) |
| | 728 | + dfa%states = new_states |
| | 729 | + dfa%start_state = new_state_id(dfa%start_state) |
| | 730 | + dfa%num_states = new_num_states |
| | 731 | + |
| | 732 | + ! Cleanup |
| | 733 | + deallocate(partition, part_size, representative, new_state_id) |
| | 734 | + deallocate(in_worklist, worklist, split_marker, new_states) |
| | 735 | + |
| | 736 | + end subroutine minimize_dfa |
| | 737 | + |
| 501 | subroutine compute_char_transitions_simple(nfa, current, c, next_set) | 738 | subroutine compute_char_transitions_simple(nfa, current, c, next_set) |
| 502 | !> Compute character transitions without case folding (for DFA compilation) | 739 | !> Compute character transitions without case folding (for DFA compilation) |
| 503 | type(nfa_t), intent(in) :: nfa | 740 | type(nfa_t), intent(in) :: nfa |