Next: , Previous: Strings, Up: GPC Units



6.15.16 Higher level string handling

The following listing contains the interface of the StringUtils unit.

This unit provides some routines for string handling on a higher level than those provided by the RTS.

     { Some routines for string handling on a higher level than those
       provided by the RTS.
     
       Copyright (C) 1999-2005 Free Software Foundation, Inc.
     
       Author: Frank Heckenbach <frank@pascal.gnu.de>
     
       This file is part of GNU Pascal.
     
       GNU Pascal is free software; you can redistribute it and/or modify
       it under the terms of the GNU General Public License as published
       by the Free Software Foundation; either version 2, or (at your
       option) any later version.
     
       GNU Pascal is distributed in the hope that it will be useful, but
       WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
       General Public License for more details.
     
       You should have received a copy of the GNU General Public License
       along with GNU Pascal; see the file COPYING. If not, write to the
       Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
       02111-1307, USA.
     
       As a special exception, if you link this file with files compiled
       with a GNU compiler to produce an executable, this does not cause
       the resulting executable to be covered by the GNU General Public
       License. This exception does not however invalidate any other
       reasons why the executable file might be covered by the GNU
       General Public License. }
     
     {$gnu-pascal,I-}
     {$if __GPC_RELEASE__ < 20030303}
     {$error This unit requires GPC release 20030303 or newer.}
     {$endif}
     
     unit StringUtils;
     
     interface
     
     uses GPC;
     
     { Various routines }
     
     { Appends Source to s, truncating the result if necessary. }
     procedure AppendStr (var s: String; const Source: String);
     
     { Cuts s to MaxLength characters. If s is already MaxLength
       characters or shorter, it doesn't change anything. }
     procedure StrCut (var s: String; MaxLength: Integer);
     
     { Returns the number of disjoint occurences of SubStr in s. Returns
       0 if SubStr is empty. }
     function  StrCount (const SubStr: String; s: String): Integer;
     
     { Returns s, with all disjoint occurences of Source replaced by
       Dest. }
     function  StrReplace (const s, Source, Dest: String) = Result:
       TString;
     
     { Sets of characters accepted for True and False by
       Char2Boolean and StrReadBoolean. }
     var
       CharactersTrue : CharSet = ['Y', 'y'];
       CharactersFalse: CharSet = ['N', 'n'];
     
     { If ch is an element of CharactersTrue, Dest is set to True,
       otherwise if it is an element of CharactersFalse, Dest is set to
       False. In both cases True is returned. If ch is not an element of
       either set, Dest is set to False and False is returned. }
     function  Char2Boolean (ch: Char; var Dest: Boolean): Boolean;
       attribute (ignorable);
     
     { Converts a digit character to its numeric value. Handles every
       base up to 36 (0 .. 9, a .. z, upper and lower case recognized).
       Returns -1 if the character is not a digit at all. If you want to
       use it for a base < 36, you have to check if the result is smaller
       than the base and not equal to -1. }
     function  Char2Digit (ch: Char): Integer;
     
     { Encode a string in a printable format (quoted printable). All
       occurences of EscapeChar within the string are encoded. If
       QuoteHigh is True, all characters above the ASCII range are
       encoded as well (required in "7 bit" environments, as per several
       RFCs). = is always encoded, as required for proper decoding, as
       are all characters below space (control characters), so if you
       don't need an escape char yourself, you can pass #0 for
       EscapeChar. }
     function  QuoteStringEscape (const s: String; EscapeChar: Char;
       QuoteHigh: Boolean): TString;
     
     { Encode a string in a printable format (quoted printable and
       surrounded with "). All occurences of " within the string are
       encoded, so the result string contains exactly two " characters
       (at the beginning and ending). This is useful to store arbitrary
       strings in text files while keeping them as readable as possible
       (which is the goal of the quoted printable encoding in general,
       see RFC 1521, section 5.1) and being able to read them back
       losslessly (with UnQuoteString). }
     function  QuoteString (const s: String): TString;
     
     { Encode a string in a printable format suitable for StrReadEnum.
       All occurences of , within the string are encoded. }
     function  QuoteEnum (const s: String): TString;
     
     { Decode a string encoded by QuoteString (removing the " and
       expanding quoted printable encoded characters). Returns True if
       successful and False if the string has an invalid form. A string
       returned by QuoteString is always valid. }
     function  UnQuoteString (var s: String): Boolean; attribute
       (ignorable);
     
     { Decode a quoted-printable string (not enclosed in ", unlike for
       UnQuoteString). Returns True if successful and False if the string
       has an invalid form. In the latter case, it still decodes as much
       as is valid, even after the error position. }
     function  UnQPString (var s: String): Boolean; attribute
       (ignorable);
     
     { Quotes a string as done in shells, i.e. all special characters are
       enclosed in either " or ', where ", $ and ` are always
       enclosed in ' and ' is always enclosed in ". }
     function  ShellQuoteString (const s: String) = Res: TString;
     
     { Replaces all tab characters in s with the appropriate amount of
       spaces, assuming tab stops at every TabSize columns. Returns True
       if successful and False if the expanded string would exceed the
       capacity of s. In the latter case, some, but not all of the tabs
       in s may have been expanded. }
     function  ExpandTabs (var s: String; TabSize: Integer): Boolean;
       attribute (ignorable);
     
     { Returns s, with all occurences of C style escape sequences (e.g.
       \n) replaced by the characters they mean. If AllowOctal is True,
       also octal character specifications (e.g. \007) are replaced. If
       RemoveQuoteChars is True, any other backslashes are removed (e.g.
       \* -> * and \\ -> \), otherwise they are kept, and also
       \\ is left as two backslashes then. }
     function  ExpandCEscapeSequences (const s: String; RemoveQuoteChars,
       AllowOctal: Boolean) = r: TString;
     
     { Routines for TPStrings }
     
     { Initialise a TPStrings variable, allocate Size characters for each
       element. This procedure does not dispose of any previously
       allocated storage, so if you use it on a previously used variable
       without freeing the storage yourself, this might cause memory
       leaks. }
     procedure AllocateTPStrings (var Strings: TPStrings; Size: Integer);
     
     { Clear all elements (set them to empty strings), does not free any
       storage. }
     procedure ClearTPStrings (var Strings: TPStrings);
     
     { Divide a string into substrings, using Separators as separator. A
       single trailing separator is ignored. Further trailing separators
       as well as any leading separators and multiple separators in a row
       produce empty substrings. }
     function TokenizeString (const Source: String; Separators: CharSet)
       = Res: PPStrings;
     
     { Divide a string into substrings, using SpaceCharacters as
       separators. The splitting is done according the usual rules of
       shells, using (and removing) single and double quotes and
       QuotingCharacter. Multiple, leading, and trailing separators are
       ignored. If there is an error, a message is stored in ErrMsg (if
       not Null), and nil is returned. nil is also returned (without an
       error message) if s is empty. }
     function ShellTokenizeString (const s: String; var ErrMsg: String) =
       Tokens: PPStrings;
     
     { String parsing routines }
     
     { All the following StrReadFoo functions behave similarly. They read
       items from a string s, starting at index i, to a variable Dest.
       They skip any space characters (spaces and tabs) by incrementing i
       first. They return True if successful, False otherwise. i is
       incremented accordingly if successful, otherwise i is left
       unchanged, apart from the skipping of space characters, and Dest
       is undefined. This behaviour makes it easy to use the functions in
       a row like this:
     
         i := 1;
         if StrReadInt    (s, i, Size)  and StrReadComma (s, i) and
            StrReadQuoted (s, i, Name)  and StrReadComma (s, i) and
            ...
            StrReadReal   (s, i, Angle) and (i > Length (s)) then ...
     
       (The check i > Length (s) is in case you don't want to accept
       trailing "garbage".) }
     
     { Just skip any space characters as described above. }
     procedure StrSkipSpaces (const s: String; var i: Integer);
     
     { Read a quoted string (as produced by QuoteString) from a string
       and unquote the result using UnQuoteString. It is considered
       failure if the result (unquoted) would be longer than the capacity
       of Dest. }
     function  StrReadQuoted (const s: String; var i: Integer; var Dest:
       String): Boolean; attribute (ignorable);
     
     { Read a string delimited with Delimiter from a string and return
       the result with the delimiters removed. It is considered failure
       if the result (without delimiters) would be longer than the
       capacity of Dest. }
     function  StrReadDelimited (const s: String; var i: Integer; var
       Dest: String; Delimiter: Char): Boolean; attribute (ignorable);
     
     { Read a word (consisting of anything but space characters and
       commas) from a string. It is considered failure if the result
       would be longer than the capacity of Dest. }
     function  StrReadWord (const s: String; var i: Integer; var Dest:
       String): Boolean; attribute (ignorable);
     
     { Check that a certain string is contained in s (after possible
       space characters). }
     function  StrReadConst (const s: String; var i: Integer; const
       Expected: String) = Res: Boolean; attribute (ignorable);
     
     { A simpler to use version of StrReadConst that expects a ,. }
     function  StrReadComma (const s: String; var i: Integer) = Res:
       Boolean; attribute (ignorable);
     
     { Read an integer number from a string. }
     function  StrReadInt (const s: String; var i: Integer; var Dest:
       Integer): Boolean; attribute (ignorable);
     
     { Read a real number from a string. }
     function  StrReadReal (const s: String; var i: Integer; var Dest:
       Real): Boolean; attribute (ignorable);
     
     { Read a Boolean value, represented by a single character
       from CharactersTrue or CharactersFalse (cf. Char2Boolean), from a
       string. }
     function  StrReadBoolean (const s: String; var i: Integer; var Dest:
       Boolean): Boolean; attribute (ignorable);
     
     { Read an enumerated value, i.e., one of the entries of IDs, from a
       string, and stores the ordinal value, i.e., the index in IDs
       (always zero-based) in Dest. }
     function  StrReadEnum (const s: String; var i: Integer; var Dest:
       Integer; const IDs: array of PString): Boolean; attribute
       (ignorable);
     
     { String hash table }
     
     const
       DefaultHashSize = 1403;
     
     type
       THash = Cardinal;
     
       PStrHashList = ^TStrHashList;
       TStrHashList = record
         Next: PStrHashList;
         s: PString;
         i: Integer;
         p: Pointer
       end;
     
       PStrHashTable = ^TStrHashTable;
       TStrHashTable (Size: Cardinal) = record
         CaseSensitive: Boolean;
         Table: array [0 .. Size - 1] of PStrHashList
       end;
     
     function  HashString          (const s: String): THash;
     function  NewStrHashTable     (Size: Cardinal; CaseSensitive:
       Boolean) = HashTable: PStrHashTable;
     procedure AddStrHashTable     (HashTable: PStrHashTable; s: String;
       i: Integer; p: Pointer);
     procedure DeleteStrHashTable  (HashTable: PStrHashTable; s: String);
     function  SearchStrHashTable  (HashTable: PStrHashTable; const s:
       String; var p: Pointer): Integer;  { p may be Null }
     procedure StrHashTableUsage   (HashTable: PStrHashTable; var
       Entries, Slots: Integer);
     procedure DisposeStrHashTable (HashTable: PStrHashTable);