RnQ/for.RnQ/RTL/PerlRegEx.pas

{**************************************************************************************************}
{                                                                                                  }
{ Perl Regular Expressions VCL component                                                           }
{                                                                                                  }
{ The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"); }
{ you may not use this file except in compliance with the License. You may obtain a copy of the    }
{ License at http://www.mozilla.org/MPL/                                                           }
{                                                                                                  }
{ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF   }
{ ANY KIND, either express or implied. See the License for the specific language governing rights  }
{ and limitations under the License.                                                               }
{                                                                                                  }
{ The Original Code is PerlRegEx.pas.                                                              }
{                                                                                                  }
{ The Initial Developer of the Original Code is Jan Goyvaerts.                                     }
{ Portions created by Jan Goyvaerts are Copyright (C) 1999, 2005, 2008, 2010  Jan Goyvaerts.       }
{ All rights reserved.                                                                             }
{                                                                                                  }
{ Design & implementation, by Jan Goyvaerts, 1999, 2005, 2008, 2010                                }
{                                                                                                  }
{ TPerlRegEx is available at http://www.regular-expressions.info/delphi.html                       }
{                                                                                                  }
{**************************************************************************************************}

unit PerlRegEx;

interface

uses
  Windows, Messages, SysUtils, Classes, System.RegularExpressionsAPI;

type
  TPerlRegExOptions = set of (
    preCaseLess,       // /i -> Case insensitive
    preMultiLine,      // /m -> ^ and $ also match before/after a newline, not just at the beginning and the end of the string
    preSingleLine,     // /s -> Dot matches any character, including \n (newline). Otherwise, it matches anything except \n
    preExtended,       // /x -> Allow regex to contain extra whitespace, newlines and Perl-style comments, all of which will be filtered out
    preAnchored,       // /A -> Successful match can only occur at the start of the subject or right after the previous match
    preUnGreedy,       // Repeat operators (+, *, ?) are not greedy by default (i.e. they try to match the minimum number of characters instead of the maximum)
    preNoAutoCapture   // (group) is a non-capturing group; only named groups capture
  );

type
  TPerlRegExState = set of (
    preNotBOL,         // Not Beginning Of Line: ^ does not match at the start of Subject
    preNotEOL,         // Not End Of Line: $ does not match at the end of Subject
    preNotEmpty        // Empty matches not allowed
  );

const
  // Maximum number of subexpressions (backreferences)
  // Subexpressions are created by placing round brackets in the regex, and are referenced by \1, \2, ...
  // In Perl, they are available as $1, $2, ... after the regex matched; with TPerlRegEx, use the Subexpressions property
  // You can also insert \1, \2, ... in the replacement string; \0 is the complete matched expression
  MAX_SUBEXPRESSIONS = 99;

{$IFDEF UNICODE}
// All implicit string casts have been verified to be correct
{$WARN IMPLICIT_STRING_CAST OFF}
// Use UTF-8 in Delphi 2009 and later, so Unicode strings are handled correctly.
// PCRE does not support UTF-16
type
  PCREString = UTF8String;
{$ELSE UNICODE}
// Use AnsiString in Delphi 2007 and earlier
type
  PCREString = AnsiString;
{$ENDIF UNICODE}

type
  TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: PCREString) of object;

type
  TPerlRegEx = class
  private    // *** Property storage, getters and setters
    FCompiled, FStudied: Boolean;
    FOptions: TPerlRegExOptions;
    FState: TPerlRegExState;
    FRegEx, FReplacement, FSubject: PCREString;
    FStart, FStop: Integer;
    FOnMatch: TNotifyEvent;
    FOnReplace: TPerlRegExReplaceEvent;
    function GetMatchedText: PCREString;
    function GetMatchedLength: Integer;
    function GetMatchedOffset: Integer;
    procedure SetOptions(Value: TPerlRegExOptions);
    procedure SetRegEx(const Value: PCREString);
    function GetGroupCount: Integer;
    function GetGroups(Index: Integer): PCREString;
    function GetGroupLengths(Index: Integer): Integer;
    function GetGroupOffsets(Index: Integer): Integer;
    procedure SetSubject(const Value: PCREString);
    procedure SetStart(const Value: Integer);
    procedure SetStop(const Value: Integer);
    function GetFoundMatch: Boolean;
  private    // *** Variables used by PCRE
    Offsets: array[0..(MAX_SUBEXPRESSIONS+1)*3] of Integer;
    OffsetCount: Integer;
    pcreOptions: Integer;
    pattern, hints, chartable: Pointer;
    FSubjectPChar: PAnsiChar;
    FHasStoredGroups: Boolean;
    FStoredGroups: array of PCREString;
    function GetSubjectLeft: PCREString;
    function GetSubjectRight: PCREString;
  protected
    procedure CleanUp;
        // Dispose off whatever we created, so we can start over. Called automatically when needed, so it is not made public
    procedure ClearStoredGroups;
  public
    constructor Create;
        // Come to life
    destructor Destroy; override;
        // Clean up after ourselves
    class function EscapeRegExChars(const S: string): string;
        // Escapes regex characters in S so that the regex engine can be used to match S as plain text
    procedure Compile;
        // Compile the regex. Called automatically by Match
    procedure Study;
        // Study the regex. Studying takes time, but will make the execution of the regex a lot faster.
        // Call study if you will be using the same regex many times
    function Match: Boolean;
        // Attempt to match the regex, starting the attempt from the beginning of Subject
    function MatchAgain: Boolean;
        // Attempt to match the regex to the remainder of Subject after the previous match (as indicated by Start)
    function Replace: PCREString;
        // Replace matched expression in Subject with ComputeReplacement.  Returns the actual replacement text from ComputeReplacement
    function ReplaceAll: Boolean;
        // Repeat MatchAgain and Replace until you drop.  Returns True if anything was replaced at all.
    function ComputeReplacement: PCREString;
        // Returns Replacement with backreferences filled in
    procedure StoreGroups;
        // Stores duplicates of Groups[] so they and ComputeReplacement will still return the proper strings
        // even if FSubject is changed or cleared
    function NamedGroup(const Name: PCREString): Integer;
        // Returns the index of the named group Name
    procedure Split(Strings: TStrings; Limit: Integer);
        // Split Subject along regex matches.  Capturing groups are ignored.
    procedure SplitCapture(Strings: TStrings; Limit: Integer); overload;
    procedure SplitCapture(Strings: TStrings; Limit: Integer; Offset: Integer); overload;
        // Split Subject along regex matches.  Capturing groups are added to Strings as well.
    property Compiled: Boolean read FCompiled;
        // True if the RegEx has already been compiled.
    property FoundMatch: Boolean read GetFoundMatch;
        // Returns True when Matched* and Group* indicate a match
    property Studied: Boolean read FStudied;
        // True if the RegEx has already been studied
    property MatchedText: PCREString read GetMatchedText;
        // The matched text
    property MatchedLength: Integer read GetMatchedLength;
        // Length of the matched text
    property MatchedOffset: Integer read GetMatchedOffset;
        // Character offset in the Subject string at which MatchedText starts
    property Start: Integer read FStart write SetStart;
        // Starting position in Subject from which MatchAgain begins
    property Stop: Integer read FStop write SetStop;
        // Last character in Subject that Match and MatchAgain search through
    property State: TPerlRegExState read FState write FState;
        // State of Subject
    property GroupCount: Integer read GetGroupCount;
        // Number of matched capturing groups
    property Groups[Index: Integer]: PCREString read GetGroups;
        // Text matched by capturing groups
    property GroupLengths[Index: Integer]: Integer read GetGroupLengths;
        // Lengths of the text matched by capturing groups
    property GroupOffsets[Index: Integer]: Integer read GetGroupOffsets;
        // Character offsets in Subject at which the capturing group matches start
    property Subject: PCREString read FSubject write SetSubject;
        // The string on which Match() will try to match RegEx
    property SubjectLeft: PCREString read GetSubjectLeft;
        // Part of the subject to the left of the match
    property SubjectRight: PCREString read GetSubjectRight;
        // Part of the subject to the right of the match
  public
    property Options: TPerlRegExOptions read FOptions write SetOptions;
        // Options
    property RegEx: PCREString read FRegEx write SetRegEx;
        // The regular expression to be matched
    property Replacement: PCREString read FReplacement write FReplacement;
        // Text to replace matched expression with. \number and $number backreferences will be substituted with Groups
        // TPerlRegEx supports the "JGsoft" replacement text flavor as explained at http://www.regular-expressions.info/refreplace.html
    property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
        // Triggered by Match and MatchAgain after a successful match
    property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
        // Triggered by Replace and ReplaceAll just before the replacement is done, allowing you to determine the new PCREString
  end;

{
  You can add TPerlRegEx instances to a TPerlRegExList to match them all together on the same subject,
  as if they were one regex regex1|regex2|regex3|...
  TPerlRegExList does not own the TPerlRegEx components, just like a TList
  If a TPerlRegEx has been added to a TPerlRegExList, it should not be used in any other situation
  until it is removed from the list
}

type
  TPerlRegExList = class
  private
    FList: TList;
    FSubject: PCREString;
    FMatchedRegEx: TPerlRegEx;
    FStart, FStop: Integer;
    function GetRegEx(Index: Integer): TPerlRegEx;
    procedure SetRegEx(Index: Integer; Value: TPerlRegEx);
    procedure SetSubject(const Value: PCREString);
    procedure SetStart(const Value: Integer);
    procedure SetStop(const Value: Integer);
    function GetCount: Integer;
  protected
    procedure UpdateRegEx(ARegEx: TPerlRegEx);
  public
    constructor Create;
    destructor Destroy; override;
  public
    function Add(ARegEx: TPerlRegEx): Integer;
    procedure Clear;
    procedure Delete(Index: Integer);
    function IndexOf(ARegEx: TPerlRegEx): Integer;
    procedure Insert(Index: Integer; ARegEx: TPerlRegEx);
  public
    function Match: Boolean;
    function MatchAgain: Boolean;
    property RegEx[Index: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
    property Count: Integer read GetCount;
    property Subject: PCREString read FSubject write SetSubject;
    property Start: Integer read FStart write SetStart;
    property Stop: Integer read FStop write SetStop;
    property MatchedRegEx: TPerlRegEx read FMatchedRegEx;
  end;

implementation


         { ********* Unit support routines ********* }

function FirstCap(const S: string): string;
begin
  if S = '' then Result := ''
  else begin
    Result := AnsiLowerCase(S);
  {$IFDEF UNICODE}
    CharUpperBuffW(@Result[1], 1);
  {$ELSE}
    CharUpperBuffA(@Result[1], 1);
  {$ENDIF}
  end
end;

function InitialCaps(const S: string): string;
var
  I: Integer;
  Up: Boolean;
begin
  Result := AnsiLowerCase(S);
  Up := True;
{$IFDEF UNICODE}
  for I := 1 to Length(Result) do begin
    case Result[I] of
      #0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$00B7:
        Up := True
      else
        if Up and (Result[I] <> '''') then begin
          CharUpperBuffW(@Result[I], 1);
          Up := False
        end
    end;
  end;
{$ELSE UNICODE}
  if SysLocale.FarEast then begin
    I := 1;
    while I <= Length(Result) do begin
      if Result[I] in LeadBytes then begin
        Inc(I, 2)
      end
      else begin
        if Result[I] in [#0..'&', '('..'.', '?', '<', '[', '{'] then Up := True
        else if Up and (Result[I] <> '''') then begin
          CharUpperBuffA(@Result[I], 1);
          Result[I] := UpperCase(Result[I])[1];
          Up := False
        end;
        Inc(I)
      end
    end
  end
  else
    for I := 1 to Length(Result) do begin
      if Result[I] in [#0..'&', '('..'.', '?', '<', '[', '{', #$B7] then Up := True
      else if Up and (Result[I] <> '''') then begin
        CharUpperBuffA(@Result[I], 1);
        Result[I] := AnsiUpperCase(Result[I])[1];
        Up := False
      end
    end;
{$ENDIF UNICODE}
end;


         { ********* TPerlRegEx component ********* }

procedure TPerlRegEx.CleanUp;
begin
  FCompiled := False; FStudied := False;
  pcre_dispose(pattern, hints, nil);
  pattern := nil;
  hints := nil;
  ClearStoredGroups;
  OffsetCount := 0;
end;

procedure TPerlRegEx.ClearStoredGroups;
begin
  FHasStoredGroups := False;
  FStoredGroups := nil;
end;

procedure TPerlRegEx.Compile;
var
  Error: PAnsiChar;
  ErrorOffset: Integer;
begin
  if FRegEx = '' then
    raise Exception.Create('TPerlRegEx.Compile() - Please specify a regular expression in RegEx first');
  CleanUp;
  Pattern := pcre_compile(PAnsiChar(FRegEx), pcreOptions, @Error, @ErrorOffset, chartable);
  if Pattern = nil then
    raise Exception.Create(Format('TPerlRegEx.Compile() - Error in regex at offset %d: %s', [ErrorOffset, AnsiString(Error)]));
  FCompiled := True
end;

(* Backreference overview:

Assume there are 13 backreferences:

Text        TPerlRegex    .NET      Java       ECMAScript
$17         $1 + "7"      "$17"     $1 + "7"   $1 + "7"
$017        $1 + "7"      "$017"    $1 + "7"   $1 + "7"
$12         $12           $12       $12        $12
$012        $1 + "2"      $12       $12        $1 + "2"
${1}2       $1 + "2"      $1 + "2"  error      "${1}2"
$$          "$"           "$"       error      "$"
\$          "$"           "\$"      "$"        "\$"
*)

function TPerlRegEx.ComputeReplacement: PCREString;
var
  Mode: AnsiChar;
  S: PCREString;
  I, J, N: Integer;

  procedure ReplaceBackreference(Number: Integer);
  var
    Backreference: PCREString;
  begin
    Delete(S, I, J-I);
    if Number <= GroupCount then begin
      Backreference := Groups[Number];
      if Backreference <> '' then begin
        // Ignore warnings; converting to UTF-8 does not cause data loss
        case Mode of
          'L', 'l': Backreference := AnsiLowerCase(Backreference);
          'U', 'u': Backreference := AnsiUpperCase(Backreference);
          'F', 'f': Backreference := FirstCap(Backreference);
          'I', 'i': Backreference := InitialCaps(Backreference);
        end;
        if S <> '' then begin
          Insert(Backreference, S, I);
          I := I + Length(Backreference);
        end
        else begin
          S := Backreference;
          I := MaxInt;
        end
      end;
    end
  end;

  procedure ProcessBackreference(NumberOnly, Dollar: Boolean);
  var
    Number, Number2: Integer;
    Group: PCREString;
  begin
    Number := -1;
    if (J <= Length(S)) and (S[J] in ['0'..'9']) then begin
      // Get the number of the backreference
      Number := Ord(S[J]) - Ord('0');
      Inc(J);
      if (J <= Length(S)) and (S[J] in ['0'..'9']) then begin
        // Expand it to two digits only if that would lead to a valid backreference
        Number2 := Number*10 + Ord(S[J]) - Ord('0');
        if Number2 <= GroupCount then begin
          Number := Number2;
          Inc(J)
        end;
      end;
    end
    else if not NumberOnly then begin
      if Dollar and (J < Length(S)) and (S[J] = '{') then begin
        // Number or name in curly braces
        Inc(J);
        case S[J] of
          '0'..'9': begin
            Number := Ord(S[J]) - Ord('0');
            Inc(J);
            while (J <= Length(S)) and (S[J] in ['0'..'9']) do begin
              Number := Number*10 + Ord(S[J]) - Ord('0');
              Inc(J)
            end;
          end;
          'A'..'Z', 'a'..'z', '_': begin
            Inc(J);
            while (J <= Length(S)) and (S[J] in ['A'..'Z', 'a'..'z', '0'..'9', '_']) do Inc(J);
            if (J <= Length(S)) and (S[J] = '}') then begin
              Group := Copy(S, I+2, J-I-2);
              Number := NamedGroup(Group);
            end
          end;
        end;
        if (J > Length(S)) or (S[J] <> '}') then Number := -1
          else Inc(J)
      end
      else if Dollar and (S[J] = '_') then begin
        // $_ (whole subject)
        Delete(S, I, J+1-I);
        Insert(Subject, S, I);
        I := I + Length(Subject);
        Exit;
      end
      else case S[J] of
        '&': begin
          // \& or $& (whole regex match)
          Number := 0;
          Inc(J);
        end;
        '+': begin
          // \+ or $+ (highest-numbered participating group)
          Number := GroupCount;
          Inc(J);
        end;
        '`': begin
          // \` or $` (backtick; subject to the left of the match)
          Delete(S, I, J+1-I);
          Insert(SubjectLeft, S, I);
          I := I + Offsets[0] - 1;
          Exit;
        end;
        '''': begin
          // \' or $' (straight quote; subject to the right of the match)
          Delete(S, I, J+1-I);
          Insert(SubjectRight, S, I);
          I := I + Length(Subject) - Offsets[1];
          Exit;
        end
      end;
    end;
    if Number >= 0 then ReplaceBackreference(Number)
      else Inc(I)
  end;

begin
  S := FReplacement;
  I := 1;
  while I < Length(S) do begin
    case S[I] of
      '\': begin
        J := I + 1;
        Assert(J <= Length(S), 'CHECK: We let I stop one character before the end, so J cannot point beyond the end of the PCREString here');
        case S[J] of
          '$', '\': begin
            Delete(S, I, 1);
            Inc(I);
          end;
          'g': begin
            if (J < Length(S)-1) and (S[J+1] = '<') and (S[J+2] in ['A'..'Z', 'a'..'z', '_']) then begin
              // Python-style named group reference \g
              J := J+3;
              while (J <= Length(S)) and (S[J] in ['0'..'9', 'A'..'Z', 'a'..'z', '_']) do Inc(J);
              if (J <= Length(S)) and (S[J] = '>') then begin
                N := NamedGroup(Copy(S, I+3, J-I-3));
                Inc(J);
                Mode := #0;
                if N > 0 then ReplaceBackreference(N)
                  else Delete(S, I, J-I)
              end
              else I := J
            end
            else I := I+2;
          end;
          'l', 'L', 'u', 'U', 'f', 'F', 'i', 'I': begin
            Mode := S[J];
            Inc(J);
            ProcessBackreference(True, False);
          end;
          else begin
            Mode := #0;
            ProcessBackreference(False, False);
          end;
        end;
      end;
      '$': begin
        J := I + 1;
        Assert(J <= Length(S), 'CHECK: We let I stop one character before the end, so J cannot point beyond the end of the PCREString here');
        if S[J] = '$' then begin
          Delete(S, J, 1);
          Inc(I);
        end
        else begin
          Mode := #0;
          ProcessBackreference(False, True);
        end
      end;
      else Inc(I)
    end
  end;
  Result := S
end;

constructor TPerlRegEx.Create;
begin
  inherited Create;
  FState := [preNotEmpty];
  chartable := pcre_maketables;
{$IFDEF UNICODE}
  pcreOptions := PCRE_UTF8 or PCRE_NEWLINE_ANY;
{$ELSE}
  pcreOptions := PCRE_NEWLINE_ANY;
{$ENDIF}
end;

destructor TPerlRegEx.Destroy;
begin
  pcre_dispose(pattern, hints, chartable);
  inherited Destroy;
end;

class function TPerlRegEx.EscapeRegExChars(const S: string): string;
var
  I: Integer;
begin
  Result := S;
  I := Length(Result);
  while I > 0 do begin
    case Result[I] of
      '.', '[', ']', '(', ')', '?', '*', '+', '{', '}', '^', '$', '|', '\':
        Insert('\', Result, I);
      #0: begin
        Result[I] := '0';
        Insert('\', Result, I);
      end;
    end;
    Dec(I);
  end;
end;

function TPerlRegEx.GetFoundMatch: Boolean;
begin
  Result := OffsetCount > 0;
end;

function TPerlRegEx.GetMatchedText: PCREString;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Result := GetGroups(0);
end;

function TPerlRegEx.GetMatchedLength: Integer;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Result := GetGroupLengths(0)
end;

function TPerlRegEx.GetMatchedOffset: Integer;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Result := GetGroupOffsets(0)
end;

function TPerlRegEx.GetGroupCount: Integer;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Result := OffsetCount-1
end;

function TPerlRegEx.GetGroupLengths(Index: Integer): Integer;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Assert((Index >= 0) and (Index <= GroupCount), 'REQUIRE: Index <= GroupCount');
  Result := Offsets[Index*2+1]-Offsets[Index*2]
end;

function TPerlRegEx.GetGroupOffsets(Index: Integer): Integer;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  Assert((Index >= 0) and (Index <= GroupCount), 'REQUIRE: Index <= GroupCount');
  Result := Offsets[Index*2]
end;

function TPerlRegEx.GetGroups(Index: Integer): PCREString;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  if Index > GroupCount then Result := ''
    else if FHasStoredGroups then Result := FStoredGroups[Index]
    else Result := Copy(FSubject, Offsets[Index*2], Offsets[Index*2+1]-Offsets[Index*2]);
end;

function TPerlRegEx.GetSubjectLeft: PCREString;
begin
  Result := Copy(Subject, 1, Offsets[0]-1);
end;

function TPerlRegEx.GetSubjectRight: PCREString;
begin
  Result := Copy(Subject, Offsets[1], MaxInt);
end;

function TPerlRegEx.Match: Boolean;
var
  I, Opts: Integer;
begin
  ClearStoredGroups;
  if not Compiled then Compile;
  if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0;
  if preNotEOL in State then Opts := Opts or PCRE_NOTEOL;
  if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY;
  OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, 0, Opts, @Offsets[0], High(Offsets));
  Result := OffsetCount > 0;
  // Convert offsets into PCREString indices
  if Result then begin
    for I := 0 to OffsetCount*2-1 do
      Inc(Offsets[I]);
    FStart := Offsets[1];
    if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position
    if Assigned(OnMatch) then OnMatch(Self)
  end;
end;

function TPerlRegEx.MatchAgain: Boolean;
var
  I, Opts: Integer;
begin
  ClearStoredGroups;
  if not Compiled then Compile;
  if preNotBOL in State then Opts := PCRE_NOTBOL else Opts := 0;
  if preNotEOL in State then Opts := Opts or PCRE_NOTEOL;
  if preNotEmpty in State then Opts := Opts or PCRE_NOTEMPTY;
  if FStart-1 > FStop then OffsetCount := -1
    else OffsetCount := pcre_exec(Pattern, Hints, FSubjectPChar, FStop, FStart-1, Opts, @Offsets[0], High(Offsets));
  Result := OffsetCount > 0;
  // Convert offsets into PCREString indices
  if Result then begin
    for I := 0 to OffsetCount*2-1 do
      Inc(Offsets[I]);
    FStart := Offsets[1];
    if Offsets[0] = Offsets[1] then Inc(FStart); // Make sure we don't get stuck at the same position
    if Assigned(OnMatch) then OnMatch(Self)
  end;
end;

function TPerlRegEx.NamedGroup(const Name: PCREString): Integer;
begin
  Result := pcre_get_stringnumber(Pattern, PAnsiChar(Name));
end;

function TPerlRegEx.Replace: PCREString;
begin
  Assert(FoundMatch, 'REQUIRE: There must be a successful match first');
  // Substitute backreferences
  Result := ComputeReplacement;
  // Allow for just-in-time substitution determination
  if Assigned(OnReplace) then OnReplace(Self, Result);
  // Perform substitution
  Delete(FSubject, MatchedOffset, MatchedLength);
  if Result <> '' then Insert(Result, FSubject, MatchedOffset);
  FSubjectPChar := PAnsiChar(FSubject);
  // Position to continue search
  FStart := FStart - MatchedLength + Length(Result);
  FStop := FStop - MatchedLength + Length(Result);
  // Replacement no longer matches regex, we assume
  ClearStoredGroups;
  OffsetCount := 0;
end;

function TPerlRegEx.ReplaceAll: Boolean;
begin
  if Match then begin
    Result := True;
    repeat
      Replace
    until not MatchAgain;
  end
  else Result := False;
end;

procedure TPerlRegEx.SetOptions(Value: TPerlRegExOptions);
begin
  if (FOptions <> Value) then begin
    FOptions := Value;
  {$IFDEF UNICODE}
    pcreOptions := PCRE_UTF8 or PCRE_NEWLINE_ANY;
  {$ELSE}
    pcreOptions := PCRE_NEWLINE_ANY;
  {$ENDIF}
    if (preCaseLess in Value) then pcreOptions := pcreOptions or PCRE_CASELESS;
    if (preMultiLine in Value) then pcreOptions := pcreOptions or PCRE_MULTILINE;
    if (preSingleLine in Value) then pcreOptions := pcreOptions or PCRE_DOTALL;
    if (preExtended in Value) then pcreOptions := pcreOptions or PCRE_EXTENDED;
    if (preAnchored in Value) then pcreOptions := pcreOptions or PCRE_ANCHORED;
    if (preUnGreedy in Value) then pcreOptions := pcreOptions or PCRE_UNGREEDY;
    if (preNoAutoCapture in Value) then pcreOptions := pcreOptions or PCRE_NO_AUTO_CAPTURE;
    CleanUp
  end
end;

procedure TPerlRegEx.SetRegEx(const Value: PCREString);
begin
  if FRegEx <> Value then begin
    FRegEx := Value;
    CleanUp
  end
end;

procedure TPerlRegEx.SetStart(const Value: Integer);
begin
  if Value < 1 then FStart := 1
  else FStart := Value;
  // If FStart > Length(Subject), MatchAgain() will simply return False
end;

procedure TPerlRegEx.SetStop(const Value: Integer);
begin
  if Value > Length(Subject) then FStop := Length(Subject)
    else FStop := Value;
end;

procedure TPerlRegEx.SetSubject(const Value: PCREString);
begin
  FSubject := Value;
  FSubjectPChar := PAnsiChar(Value);
  FStart := 1;
  FStop := Length(Subject);
  if not FHasStoredGroups then OffsetCount := 0;
end;

procedure TPerlRegEx.Split(Strings: TStrings; Limit: Integer);
var
  Offset, Count: Integer;
begin
  Assert(Strings <> nil, 'REQUIRE: Strings');
  if (Limit = 1) or not Match then Strings.Add(Subject)
  else begin
    Offset := 1;
    Count := 1;
    repeat
      Strings.Add(Copy(Subject, Offset, MatchedOffset - Offset));
      Inc(Count);
      Offset := MatchedOffset + MatchedLength;
    until ((Limit > 1) and (Count >= Limit)) or not MatchAgain;
    Strings.Add(Copy(Subject, Offset, MaxInt));
  end
end;

procedure TPerlRegEx.SplitCapture(Strings: TStrings; Limit, Offset: Integer);
var
  Count: Integer;
  bUseOffset : boolean;
  iOffset : integer;
begin
  Assert(Strings <> nil, 'REQUIRE: Strings');
  if (Limit = 1) or not Match then Strings.Add(Subject)
  else
  begin
    bUseOffset := Offset <> 1;
    if Offset <> 1 then
      Dec(Limit);
    iOffset := 1;
    Count := 1;
    repeat
      if bUseOffset then
      begin
        if MatchedOffset >= Offset then
        begin
          bUseOffset := False;
          Strings.Add(Copy(Subject, 1, MatchedOffset -1));
          if Self.GroupCount > 0 then
            Strings.Add(Self.Groups[Self.GroupCount]);
        end;
      end
      else
      begin
        Strings.Add(Copy(Subject, iOffset, MatchedOffset - iOffset));
        Inc(Count);
        if Self.GroupCount > 0 then
          Strings.Add(Self.Groups[Self.GroupCount]);
      end;
      iOffset := MatchedOffset + MatchedLength;
    until ((Limit > 1) and (Count >= Limit)) or not MatchAgain;
    Strings.Add(Copy(Subject, iOffset, MaxInt));
  end
end;

procedure TPerlRegEx.SplitCapture(Strings: TStrings; Limit: Integer);
begin
  SplitCapture(Strings,Limit,1);
end;

procedure TPerlRegEx.StoreGroups;
var
  I: Integer;
begin
  if OffsetCount > 0 then begin
    ClearStoredGroups;
    SetLength(FStoredGroups, GroupCount+1);
    for I := GroupCount downto 0 do
      FStoredGroups[I] := Groups[I];
    FHasStoredGroups := True;
  end
end;

procedure TPerlRegEx.Study;
var
  Error: PAnsiChar;
begin
  if not FCompiled then Compile;
  Hints := pcre_study(Pattern, 0, @Error);
  if Error <> nil then
    raise Exception.Create('TPerlRegEx.Study() - Error studying the regex: ' + AnsiString(Error));
  FStudied := True
end;

{ TPerlRegExList }

function TPerlRegExList.Add(ARegEx: TPerlRegEx): Integer;
begin
  Result := FList.Add(ARegEx);
  UpdateRegEx(ARegEx);
end;

procedure TPerlRegExList.Clear;
begin
  FList.Clear;
end;

constructor TPerlRegExList.Create;
begin
  inherited Create;
  FList := TList.Create;
end;

procedure TPerlRegExList.Delete(Index: Integer);
begin
  FList.Delete(Index);
end;

destructor TPerlRegExList.Destroy;
begin
  FList.Free;
  inherited
end;

function TPerlRegExList.GetCount: Integer;
begin
  Result := FList.Count;
end;

function TPerlRegExList.GetRegEx(Index: Integer): TPerlRegEx;
begin
  Result := TPerlRegEx(Pointer(FList[Index]));
end;

function TPerlRegExList.IndexOf(ARegEx: TPerlRegEx): Integer;
begin
  Result := FList.IndexOf(ARegEx);
end;

procedure TPerlRegExList.Insert(Index: Integer; ARegEx: TPerlRegEx);
begin
  FList.Insert(Index, ARegEx);
  UpdateRegEx(ARegEx);
end;

function TPerlRegExList.Match: Boolean;
begin
  SetStart(1);
  FMatchedRegEx := nil;
  Result := MatchAgain;
end;

function TPerlRegExList.MatchAgain: Boolean;
var
  I, MatchStart, MatchPos: Integer;
  ARegEx: TPerlRegEx;
begin
  if FMatchedRegEx <> nil then
    MatchStart := FMatchedRegEx.MatchedOffset + FMatchedRegEx.MatchedLength
  else
    MatchStart := FStart;
  FMatchedRegEx := nil;
  MatchPos := MaxInt;
  for I := 0 to Count-1 do begin
    ARegEx := RegEx[I];
    if (not ARegEx.FoundMatch) or (ARegEx.MatchedOffset < MatchStart) then begin
      ARegEx.Start := MatchStart;
      ARegEx.MatchAgain;
    end;
    if ARegEx.FoundMatch and (ARegEx.MatchedOffset < MatchPos) then begin
      MatchPos := ARegEx.MatchedOffset;
      FMatchedRegEx := ARegEx;
    end;
    if MatchPos = MatchStart then Break;
  end;
  Result := MatchPos < MaxInt;
end;

procedure TPerlRegExList.SetRegEx(Index: Integer; Value: TPerlRegEx);
begin
  FList[Index] := Value;
  UpdateRegEx(Value);
end;

procedure TPerlRegExList.SetStart(const Value: Integer);
var
  I: Integer;
begin
  if FStart <> Value then begin
    FStart := Value;
    for I := Count-1 downto 0 do
      RegEx[I].Start := Value;
    FMatchedRegEx := nil;
  end;
end;

procedure TPerlRegExList.SetStop(const Value: Integer);
var
  I: Integer;
begin
  if FStop <> Value then begin
    FStop := Value;
    for I := Count-1 downto 0 do
      RegEx[I].Stop := Value;
    FMatchedRegEx := nil;
  end;
end;

procedure TPerlRegExList.SetSubject(const Value: PCREString);
var
  I: Integer;
begin
  if FSubject <> Value then begin
    FSubject := Value;
    for I := Count-1 downto 0 do
      RegEx[I].Subject := Value;
    FMatchedRegEx := nil;
  end;
end;

procedure TPerlRegExList.UpdateRegEx(ARegEx: TPerlRegEx);
begin
  ARegEx.Subject := FSubject;
  ARegEx.Start := FStart;
end;

end.