module regex_engine
  !> NFA simulation engine using Thompson's algorithm
  !> Tracks set of current states, processes input character by character
  use regex_types
  implicit none
  private

  public :: nfa_match, nfa_search

  integer, parameter :: MAX_STATES = 1024

contains

  function nfa_match(nfa, text, start_pos, ignore_case) result(res)
    !> Try to match NFA starting at start_pos in text
    !> Returns match result with positions if successful
    type(nfa_t), intent(in) :: nfa
    character(len=*), intent(in) :: text
    integer, intent(in) :: start_pos
    logical, intent(in) :: ignore_case
    type(match_result_t) :: res

    integer :: current(MAX_STATES), next_states(MAX_STATES)
    integer :: num_current, num_next
    integer :: i, pos, text_len
    character(len=1) :: c

    res%matched = .false.
    text_len = len_trim(text)

    if (nfa%num_states == 0) return

    ! Initialize with epsilon closure of start state
    num_current = 0
    call epsilon_closure(nfa, nfa%start_state, current, num_current, &
                         text, start_pos, text_len)

    ! Check if already accepting (empty pattern case)
    if (is_accepting(nfa, current, num_current)) then
      res%matched = .true.
      res%match_start = start_pos
      res%match_end = start_pos - 1  ! Empty match
      return
    end if

    ! Process each character
    pos = start_pos
    do while (pos <= text_len .and. num_current > 0)
      c = text(pos:pos)

      ! Compute next states
      num_next = 0
      do i = 1, num_current
        call step(nfa, current(i), c, pos, text, text_len, ignore_case, &
                  next_states, num_next)
      end do

      ! Compute epsilon closure of next states
      num_current = 0
      do i = 1, num_next
        call epsilon_closure(nfa, next_states(i), current, num_current, &
                             text, pos + 1, text_len)
      end do

      pos = pos + 1

      ! Check for acceptance
      if (is_accepting(nfa, current, num_current)) then
        res%matched = .true.
        res%match_start = start_pos
        res%match_end = pos - 1
        ! Don't return yet - try to find longest match (greedy)
      end if
    end do

  end function nfa_match

  function nfa_search(nfa, text, ignore_case) result(res)
    !> Search for pattern anywhere in text
    type(nfa_t), intent(in) :: nfa
    character(len=*), intent(in) :: text
    logical, intent(in) :: ignore_case
    type(match_result_t) :: res

    integer :: i, text_len
    type(match_result_t) :: try_res

    res%matched = .false.
    text_len = len_trim(text)

    ! Try matching at each position
    do i = 1, text_len + 1  ! +1 to handle empty string match at end
      try_res = nfa_match(nfa, text, i, ignore_case)
      if (try_res%matched) then
        res = try_res
        return
      end if
    end do

  end function nfa_search

  subroutine epsilon_closure(nfa, state, states, num_states, text, pos, text_len)
    !> Compute epsilon closure of a state, adding to states array
    type(nfa_t), intent(in) :: nfa
    integer, intent(in) :: state
    integer, intent(inout) :: states(MAX_STATES)
    integer, intent(inout) :: num_states
    character(len=*), intent(in) :: text
    integer, intent(in) :: pos, text_len

    integer :: stack(MAX_STATES), stack_top
    integer :: current, i, target
    type(nfa_transition_t) :: trans
    logical :: visited(MAX_STATES)

    visited = .false.
    stack_top = 1
    stack(1) = state

    do while (stack_top > 0)
      current = stack(stack_top)
      stack_top = stack_top - 1

      if (current < 1 .or. current > nfa%num_states) cycle
      if (visited(current)) cycle
      visited(current) = .true.

      ! Add to result
      if (num_states < MAX_STATES) then
        num_states = num_states + 1
        states(num_states) = current
      end if

      ! Follow epsilon transitions and anchors
      do i = 1, nfa%states(current)%num_trans
        trans = nfa%states(current)%trans(i)

        if (trans%trans_type == TRANS_EPSILON) then
          ! Check for backref marker (negative anchor_type)
          if (trans%anchor_type < 0) then
            ! Backref - skip for now (would need captured text)
            cycle
          end if

          target = trans%target
          if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
            stack_top = stack_top + 1
            stack(stack_top) = target
          end if

        else if (trans%trans_type == TRANS_ANCHOR) then
          ! Check if anchor matches
          if (anchor_matches(trans%anchor_type, text, pos, text_len)) then
            target = trans%target
            if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
              stack_top = stack_top + 1
              stack(stack_top) = target
            end if
          end if
        end if
      end do
    end do

  end subroutine epsilon_closure

  subroutine step(nfa, state, c, pos, text, text_len, ignore_case, next_states, num_next)
    !> Take one step from state on character c
    type(nfa_t), intent(in) :: nfa
    integer, intent(in) :: state
    character(len=1), intent(in) :: c
    integer, intent(in) :: pos, text_len
    character(len=*), intent(in) :: text
    logical, intent(in) :: ignore_case
    integer, intent(inout) :: next_states(MAX_STATES)
    integer, intent(inout) :: num_next

    integer :: i
    type(nfa_transition_t) :: trans
    character(len=1) :: c_lower, match_lower

    do i = 1, nfa%states(state)%num_trans
      trans = nfa%states(state)%trans(i)

      select case (trans%trans_type)
        case (TRANS_CHAR)
          if (ignore_case) then
            c_lower = to_lower_char(c)
            match_lower = to_lower_char(trans%match_char)
            if (c_lower == match_lower) then
              call add_if_unique(next_states, num_next, trans%target)
            end if
          else
            if (c == trans%match_char) then
              call add_if_unique(next_states, num_next, trans%target)
            end if
          end if

        case (TRANS_CLASS)
          if (char_in_class(c, trans%char_class, trans%negated, ignore_case)) then
            call add_if_unique(next_states, num_next, trans%target)
          end if

        case (TRANS_ANY)
          ! Match any character except newline
          if (c /= char(10)) then
            call add_if_unique(next_states, num_next, trans%target)
          end if

      end select
    end do

  end subroutine step

  function is_accepting(nfa, states, num_states) result(res)
    !> Check if any state in the set is an accepting state
    type(nfa_t), intent(in) :: nfa
    integer, intent(in) :: states(MAX_STATES)
    integer, intent(in) :: num_states
    logical :: res

    integer :: i

    res = .false.
    do i = 1, num_states
      if (states(i) >= 1 .and. states(i) <= nfa%num_states) then
        if (nfa%states(states(i))%is_accept) then
          res = .true.
          return
        end if
      end if
    end do

  end function is_accepting

  function anchor_matches(anchor_type, text, pos, text_len) result(matches)
    !> Check if anchor matches at position
    integer, intent(in) :: anchor_type
    character(len=*), intent(in) :: text
    integer, intent(in) :: pos, text_len
    logical :: matches

    logical :: at_start, at_end, prev_word, curr_word

    matches = .false.

    at_start = (pos == 1) .or. (pos < 1)
    at_end = (pos > text_len)

    select case (anchor_type)
      case (1)  ! ^ start anchor
        if (at_start) then
          matches = .true.
        else if (pos > 1 .and. pos <= text_len + 1) then
          matches = (text(pos-1:pos-1) == char(10))
        end if

      case (2)  ! $ end anchor
        if (at_end) then
          matches = .true.
        else if (pos >= 1 .and. pos <= text_len) then
          matches = (text(pos:pos) == char(10))
        end if

      case (3)  ! \< word start
        prev_word = .false.
        curr_word = .false.
        if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
        if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
        matches = (.not. prev_word) .and. curr_word

      case (4)  ! \> word end
        prev_word = .false.
        curr_word = .false.
        if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
        if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
        matches = prev_word .and. (.not. curr_word)

      case (5)  ! \b word boundary
        prev_word = .false.
        curr_word = .false.
        if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
        if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
        matches = prev_word .neqv. curr_word

      case (6)  ! \B not word boundary
        prev_word = .false.
        curr_word = .false.
        if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
        if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
        matches = prev_word .eqv. curr_word
    end select

  end function anchor_matches

  pure function is_word_char(c) result(res)
    character(len=1), intent(in) :: c
    logical :: res
    integer :: ic

    ic = ichar(c)
    res = (ic >= ichar('a') .and. ic <= ichar('z')) .or. &
          (ic >= ichar('A') .and. ic <= ichar('Z')) .or. &
          (ic >= ichar('0') .and. ic <= ichar('9')) .or. &
          (c == '_')
  end function is_word_char

  pure function to_lower_char(c) result(lower)
    character(len=1), intent(in) :: c
    character(len=1) :: lower
    integer :: ic

    ic = ichar(c)
    if (ic >= ichar('A') .and. ic <= ichar('Z')) then
      ! ASCII uppercase A-Z -> a-z
      lower = char(ic + 32)
    else if (ic >= 192 .and. ic <= 214) then
      ! Latin-1 uppercase À-Ö (192-214) -> à-ö (224-246)
      lower = char(ic + 32)
    else if (ic >= 216 .and. ic <= 222) then
      ! Latin-1 uppercase Ø-Þ (216-222) -> ø-þ (248-254)
      lower = char(ic + 32)
    else if (ic >= 128 .and. ic <= 150) then
      ! UTF-8 continuation byte for uppercase Latin Extended-A (U+00C0-U+00D6)
      ! When preceded by 0xC3, these represent À-Ö, fold to à-ö
      lower = char(ic + 32)
    else if (ic >= 152 .and. ic <= 158) then
      ! UTF-8 continuation byte for uppercase Latin Extended-A (U+00D8-U+00DE)
      ! When preceded by 0xC3, these represent Ø-Þ, fold to ø-þ
      lower = char(ic + 32)
    else
      lower = c
    end if
  end function to_lower_char

  function char_in_class(c, char_class, negated, ignore_case) result(res)
    character(len=1), intent(in) :: c
    logical, intent(in) :: char_class(0:255)
    logical, intent(in) :: negated, ignore_case
    logical :: res

    integer :: ic
    character(len=1) :: c_lower, c_upper

    ic = ichar(c)
    res = char_class(ic)

    ! For case-insensitive, check both cases
    if (ignore_case .and. .not. res) then
      c_lower = to_lower_char(c)
      c_upper = to_upper_char(c)
      if (c_lower /= c) res = char_class(ichar(c_lower))
      if (.not. res .and. c_upper /= c) res = char_class(ichar(c_upper))
    end if

    if (negated) res = .not. res

  end function char_in_class

  pure function to_upper_char(c) result(upper)
    character(len=1), intent(in) :: c
    character(len=1) :: upper
    integer :: ic

    ic = ichar(c)
    if (ic >= ichar('a') .and. ic <= ichar('z')) then
      upper = char(ic - 32)
    else
      upper = c
    end if
  end function to_upper_char

  subroutine add_if_unique(arr, num, val)
    integer, intent(inout) :: arr(MAX_STATES)
    integer, intent(inout) :: num
    integer, intent(in) :: val

    integer :: i

    ! Check if already present
    do i = 1, num
      if (arr(i) == val) return
    end do

    ! Add if space available
    if (num < MAX_STATES) then
      num = num + 1
      arr(num) = val
    end if

  end subroutine add_if_unique

end module regex_engine