diff --git a/include/lgi/common/StringClass.h b/include/lgi/common/StringClass.h --- a/include/lgi/common/StringClass.h +++ b/include/lgi/common/StringClass.h @@ -1,1240 +1,1248 @@ /* * A mis-guided attempt to make a string class look and feel like a python string. * * Author: Matthew Allen * Email: fret@memecode.com * Created: 16 Sept 2014 */ #pragma once #include #include #include #if defined(_MSC_VER) || defined(__GTK_H__) // This fixes compile errors in VS2008/Gtk #undef _SIGN_DEFINED #undef abs #endif #include #if defined(_MSC_VER) && _MSC_VER < 1800/*_MSC_VER_VS2013*/ #include #define PRId64 "I64i" #else #define __STDC_FORMAT_MACROS 1 #include #include #ifndef PRId64 #warning "PRId64 not defined." #define PRId64 "Ld" #endif #endif #include "LgiOsDefs.h" #include "lgi/common/Unicode.h" #include "lgi/common/Array.h" #ifndef IsDigit #define IsDigit(ch) ((ch) >= '0' && (ch) <= '9') #endif LgiExtern int LPrintf(class LString &Str, const char *Format, va_list &Arg); /// A pythonic string class. class LString { protected: /// This structure holds the string's data itself and is shared /// between one or more LString instances. struct RefStr { /// A reference count int32 Refs; /// The bytes in 'Str' not including the NULL terminator size_t Len; /// The first byte of the string. Further bytes are allocated /// off the end of the structure using malloc. This must always /// be the last element in the struct. char Str[1]; } *Str; inline void _strip(LString &ret, const char *set, bool left, bool right) { if (!Str) return; char *s = Str->Str; char *e = s + Str->Len; if (!set) set = " \t\r\n"; if (left) { while (s < e && strchr(set, *s)) s++; } if (right) { while (e > s && strchr(set, e[-1])) e--; } if (e > s) ret.Set(s, e - s); } public: #ifdef LGI_UNIT_TESTS static int32 RefStrCount; #endif /// A copyable array of strings class Array : public LArray { public: Array(size_t PreAlloc = 0) : LArray(PreAlloc) {} Array(const Array &a) { *this = (Array&)a; } Array &operator =(const Array &a) { SetFixedLength(false); *((LArray*)this) = a; SetFixedLength(true); return *this; } Array &operator +=(const Array &a) { SetFixedLength(false); Add(a); SetFixedLength(true); return *this; } + + Array &operator +=(const LArray &a) + { + SetFixedLength(false); + Add(a); + SetFixedLength(true); + return *this; + } }; /// Empty constructor LString() { Str = NULL; } // This odd looking constructor allows the object to be used as the value type // in a GHashTable, where the initialiser is '0', an integer. LString(int i) { Str = NULL; } #ifndef _MSC_VER // This odd looking constructor allows the object to be used as the value type // in a GHashTable, where the initialiser is '0', an integer. LString(long int i) { Str = NULL; } #endif /// String constructor LString(const char *str, ptrdiff_t bytes) { Str = NULL; Set(str, bytes); } /// const char* constructor LString(const char *str) { Str = NULL; Set(str); } /// const char16* constructor LString(const wchar_t *str, ptrdiff_t wchars = -1) { Str = NULL; SetW(str, wchars); } #if defined(_WIN32) || defined(MAC) /// const uint32* constructor LString(const uint32_t *str, ptrdiff_t chars = -1) { Str = NULL; if (chars < 0) chars = Strlen(str); ptrdiff_t utf_len = 0; const uint32_t *end = str + chars; const uint32_t *c = str; while (c < end) { uint8_t utf[6], *u = utf; ssize_t len = sizeof(utf); if (!LgiUtf32To8(*c++, u, len)) break; utf_len += u - utf; } if (Length((uint32_t)utf_len)) { c = str; uint8_t *u = (uint8_t*)Str->Str; ssize_t len = Str->Len; while (c < end) { if (!LgiUtf32To8(*c++, u, len)) break; } *u++ = 0; } } #endif /// LString constructor LString(const LString &s) { Str = s.Str; if (Str) Str->Refs++; } ~LString() { Empty(); } /// Removes a reference to the string and deletes if needed void Empty() { if (!Str) return; Str->Refs--; if (Str->Refs < 0) { assert(!"Invalid refs"); } if (Str->Refs == 0) { free(Str); #ifdef LGI_UNIT_TESTS RefStrCount--; #endif } Str = NULL; } /// Returns the pointer to the string data char *Get() const { return Str ? Str->Str : NULL; } /// Sets the string to a new value bool Set ( /// Can be a pointer to string data or NULL to create an empty buffer (requires valid length) const char *str, /// Byte length of input string or -1 to copy till the NULL terminator. ptrdiff_t bytes = -1 ) { Empty(); if (bytes < 0) { if (str) bytes = strlen(str); else return false; } Str = (RefStr*)malloc(sizeof(RefStr) + bytes); if (!Str) return false; Str->Refs = 1; Str->Len = (uint32_t)bytes; #ifdef LGI_UNIT_TESTS RefStrCount++; #endif if (str) memcpy(Str->Str, str, bytes); Str->Str[bytes] = 0; return true; } /// Sets the string to a new value bool SetW ( /// Can be a pointer to string data or NULL to create an empty buffer (requires valid length) const wchar_t *str, /// Number of 'char16' values in the input string or -1 to copy till the NULL terminator. ptrdiff_t wchars = -1 ) { size_t Sz = WideToUtf8Len(str, wchars); if (Length(Sz)) { #ifdef _MSC_VER const uint16 *i = (const uint16*) str; ssize_t InLen = wchars >= 0 ? wchars << 1 : 0x7fffffff; assert(sizeof(*i) == sizeof(*str)); uint8_t *o = (uint8_t*)Str->Str; ssize_t OutLen = Str->Len; for (uint32_t ch; ch = LgiUtf16To32(i, InLen); ) { if (!LgiUtf32To8(ch, o, OutLen)) { *o = 0; break; } } #else uint8_t *o = (uint8_t*)Str->Str; ssize_t OutLen = Str->Len; if (wchars >= 0) { const wchar_t *end = str + wchars; for (const wchar_t *ch = str; ch < end; ch++) { if (!LgiUtf32To8(*ch, o, OutLen)) { *o = 0; break; } } } else { for (const wchar_t *ch = str; *ch; ch++) { if (!LgiUtf32To8(*ch, o, OutLen)) { *o = 0; break; } } } #endif *o = 0; } return true; } /// Equality operator (case sensitive) bool operator ==(const LString &s) { const char *a = Get(); const char *b = s.Get(); if (!a && !b) return true; if (!a || !b) return false; return !strcmp(a, b); } bool operator !=(const LString &s) { return !(*this == s); } // Equality function (default: case insensitive, as the operator== is case sensitive) bool Equals(const char *b, bool CaseInsensitive = true) const { const char *a = Get(); if (!a && !b) return true; if (!a || !b) return false; return !(CaseInsensitive ? _stricmp(a, b) : strcmp(a, b)); } // Equality function (default: case insensitive, as the operator== is case sensitive) bool Equals(const LString &s, bool CaseInsensitive = true) const { const char *a = Get(); const char *b = s.Get(); if (!a && !b) return true; if (!a || !b) return false; return !(CaseInsensitive ? _stricmp(a, b) : strcmp(a, b)); } /// Assignment operator to copy one string to another LString &operator =(const LString &s) { if (this != &s) { Empty(); Str = s.Str; if (Str) Str->Refs++; } return *this; } /// Equality with a C string (case sensitive) bool operator ==(const char *b) { const char *a = Get(); if (!a && !b) return true; if (!a || !b) return false; return !strcmp(a, b); } bool operator !=(const char *b) { return !(*this == b); } /// Assignment operators LString &operator =(const char *s) { if (Str == NULL || s < Str->Str || s > Str->Str + Str->Len) { Empty(); Set(s); } else if (s != Str->Str) { // Special case for setting it to part of itself // If you try and set a string to the start, it's a NOP ptrdiff_t Off = s - Str->Str; memmove(Str->Str, s, Str->Len - Off + 1); Str->Len -= (uint32_t)Off; } return *this; } LString &operator =(const wchar_t *s) { SetW(s); return *this; } LString &operator =(int val) { char n[32]; sprintf_s(n, sizeof(n), "%i", val); Set(n); return *this; } LString &operator =(int64 val) { char n[32]; sprintf_s(n, sizeof(n), "%" PRId64, (int64_t)val); Set(n); return *this; } /// Cast to C string operator operator char *() const { return Str && Str->Len > 0 ? Str->Str : NULL; } int operator -(const LString &s) const { return Stricmp(Get(), s.Get()); } /// Concatenation operator LString operator +(const LString &s) { LString Ret; size_t Len = Length() + s.Length(); if (Ret.Set(NULL, Len)) { char *p = Ret.Get(); if (p) { if (Str) { memcpy(p, Str->Str, Str->Len); p += Str->Len; } if (s.Str) { memcpy(p, s.Str->Str, s.Str->Len); p += s.Str->Len; } *p++ = 0; } } return Ret; } /// Concatenation / assignment operator LString &operator +=(const LString &s) { ssize_t Len = Length() + s.Length(); ssize_t Alloc = sizeof(RefStr) + Len; RefStr *rs = (RefStr*)malloc(Alloc); if (rs) { rs->Refs = 1; rs->Len = Len; #ifdef LGI_UNIT_TESTS RefStrCount++; #endif char *p = rs->Str; if (Str) { memcpy(p, Str->Str, Str->Len); p += Str->Len; } if (s.Str) { memcpy(p, s.Str->Str, s.Str->Len); p += s.Str->Len; } *p++ = 0; assert(p - (char*)rs <= Alloc); Empty(); Str = rs; } return *this; } LString operator *(ssize_t mul) { LString s; if (Str) { s.Length(Str->Len * mul); char *out = s.Get(); for (ssize_t i=0; iLen) memcpy(out, Str->Str, Str->Len); *out = 0; } return s; } /// Gets the length in bytes size_t Length() const { return Str ? Str->Len : 0; } size_t Length(ssize_t NewLen) { if (NewLen < 0) { LAssert(!"No negative string len."); Empty(); } else if (Str) { if (NewLen <= (ssize_t)Str->Len) { Str->Len = NewLen; Str->Str[NewLen] = 0; } else { RefStr *n = (RefStr*)malloc(sizeof(RefStr) + NewLen); if (n) { n->Len = NewLen; n->Refs = 1; memcpy(n->Str, Str->Str, Str->Len); n->Str[Str->Len] = 0; // NULL terminate... Empty(); // Deref the old string... Str = n; } else return 0; } } else { Str = (RefStr*)malloc(sizeof(RefStr) + NewLen); if (Str) { Str->Len = NewLen; Str->Refs = 1; Str->Str[0] = 0; // NULL terminate... } else return 0; } return Str ? Str->Len : 0; } /// Splits the string into parts using a separator Array Split(const char *Sep, int Count = -1, bool CaseSen = false) { Array a; if (Str && Sep) { const char *s = Str->Str, *Prev = s; const char *end = s + Str->Len; size_t SepLen = strlen(Sep); if (s[Str->Len] == 0) { while ((s = CaseSen ? strstr(s, Sep) : Stristr(s, Sep))) { if (s > Prev) a.New().Set(Prev, s - Prev); s += SepLen; Prev = s; if (Count > 0 && a.Length() >= (uint32_t)Count) break; } if (Prev < end) a.New().Set(Prev, end - Prev); a.SetFixedLength(); } else assert(!"String not NULL terminated."); } return a; } /// Splits the string into parts using a separator Array RSplit(const char *Sep, int Count = -1, bool CaseSen = false) { Array a; if (Str && Sep) { const char *s = Get(); size_t SepLen = strlen(Sep); LArray seps; while ((s = CaseSen ? strstr(s, Sep) : Stristr(s, Sep))) { seps.Add(s); s += SepLen; } ssize_t i, Last = seps.Length() - 1; LString p; for (i=Last; i>=0; i--) { const char *part = seps[i] + SepLen; if (i == Last) p.Set(part); else p.Set(part, seps[i+1]-part); a.AddAt(0, p); if (Count > 0 && a.Length() >= (uint32_t)Count) break; } const char *End = seps[i > 0 ? i : 0]; p.Set(Get(), End - Get()); a.AddAt(0, p); } a.SetFixedLength(); return a; } /// Splits the string into parts using delimiter chars Array SplitDelimit(const char *Delimiters = NULL, int Count = -1, bool GroupDelimiters = true) const { Array a; if (Str) { const char *delim = Delimiters ? Delimiters : " \t\r\n"; const char *s = Get(), *end = s + Length(); while (s < end) { // Skip over non-delimiters const char *e = s; while (e < end && !strchr(delim, *e)) e++; if (e > s || !GroupDelimiters) a.New().Set(s, e - s); s = e; if (*s) s++; if (GroupDelimiters) { // Skip any delimiters while (s < end && strchr(delim, *s)) s++; } // Create the string if (Count > 0 && a.Length() >= (uint32_t)Count) break; } if ( s < end || ( !GroupDelimiters && s > Get() && strchr(delim, s[-1]) ) ) a.New().Set(s); } a.SetFixedLength(); return a; } /// Joins an array of strings using a separator LString Join(const LArray &a) { LString ret; if (a.Length() == 0) return ret; char *Sep = Get(); size_t SepLen = Sep ? strlen(Sep) : 0; size_t Bytes = SepLen * (a.Length() - 1); LArray ALen; for (unsigned i=0; iStr; LArray Matches; while ((Match = (CaseSen ? strstr(Match, Old) : Stristr(Match, Old)))) { Matches.Add(Match); if (Count >= 0 && (int)Matches.Length() >= Count) break; Match += OldLen; } size_t NewSize = Str->Len + (Matches.Length() * (NewLen - OldLen)); s.Length((uint32_t)NewSize); char *Out = s.Get(); char *In = Str->Str; // For each match... for (unsigned i=0; iStr + Str->Len; if (In < End) { ptrdiff_t Bytes = End - In; memcpy(Out, In, Bytes); Out += Bytes; } assert(Out - s.Get() == NewSize); // Check we got the size right... *Out = 0; // Null terminate } else { s = *this; } return s; } /// Convert string to double double Float() { return Str ? atof(Str->Str) : NAN; } /// Convert to integer int64 Int(int Base = 10) { if (!Str) return -1; if ( Str->Len > 2 && Str->Str[0] == '0' && ( Str->Str[1] == 'x' || Str->Str[1] == 'X' ) ) { return Atoi(Str->Str+2, 16); } return Atoi(Str->Str, Base); } /// Checks if the string is a number bool IsNumeric() { if (!Str) return false; for (char *s = Str->Str; *s; s++) { if (!IsDigit(*s) && !strchr("e-+.", *s)) return false; } return true; } /// Check for non whitespace bool IsEmpty() { if (!Str) return true; for (char *s = Str->Str; *s; s++) { if (*s != ' ' && *s != '\t' && *s != '\r' && *s != '\n') return false; } return true; } /// Reverses all the characters in the string LString Reverse() { LString s; if (Length() > 0) { s = Str->Str; for (auto *a = s.Get(), *b = s.Get() + s.Length() - 1; a < b; a++, b--) { char t = *a; *a = *b; *b = t; } } return s; } /// Find a sub-string ssize_t Find(const char *needle, ssize_t start = 0, ssize_t end = -1) { if (!needle) return -1; char *c = Get(); if (!c) return -1; char *pos = c + start; while (c < pos) { if (!*c) return -1; c++; } char *found = (end > 0) ? Strnstr(c, needle, end - start) : strstr(c, needle); return (found) ? found - Get() : -1; } /// Reverse find a string (starting from the end) ssize_t RFind(const char *needle, int start = 0, ssize_t end = -1) { if (!needle) return -1; char *c = Get(); if (!c) return -1; char *pos = c + start; while (c < pos) { if (!*c) return -1; c++; } char *found, *prev = NULL; size_t str_len = strlen(needle); while (( found = ( (end > 0) ? Strnstr(c, needle, end - start) : strstr(c, needle) ) )) { prev = found; c = found + str_len; } return (prev) ? prev - Get() : -1; } /// Returns a copy of the string with all the characters converted to lower case LString Lower() { LString s; if (Str && s.Set(Str->Str, Str->Len)) Strlwr(s.Get()); return s; } /// Returns a copy of the string with all the characters converted to upper case LString Upper() { LString s; if (Str && s.Set(Str->Str, Str->Len)) Strupr(s.Get()); return s; } void Swap(LString &s) { LSwap(Str, s.Str); } /// Gets the character at 'index' int operator() (ssize_t index) const { if (!Str) return 0; char *c = Str->Str; if (index < 0) { size_t idx = Str->Len + index; return c[idx]; } else if (index < (int)Str->Len) { return c[index]; } return 0; } /// Gets the string between at 'start' and 'end' (not including the end'th character) LString operator() (ptrdiff_t start, ptrdiff_t end) { LString s; if (Str) { ptrdiff_t start_idx = start < 0 ? Str->Len + start + 1 : start; if (start_idx >= 0 && (uint32_t)start_idx < Str->Len) { ptrdiff_t end_idx = end < 0 ? Str->Len + end + 1 : end; if (end_idx >= start_idx && (uint32_t)end_idx <= Str->Len) s.Set(Str->Str + start_idx, end_idx - start_idx); } } return s; } /// Strip off any leading and trailing characters from 'set' (or whitespace if NULL) LString Strip(const char *set = NULL) { LString ret; _strip(ret, set, true, true); return ret; } /// Strip off any leading characters from 'set' (or whitespace if NULL) LString LStrip(const char *set = NULL) { LString ret; _strip(ret, set, true, false); return ret; } /// Strip off any trailing characters from 'set' (or whitespace if NULL) LString RStrip(const char *set = NULL) { LString ret; _strip(ret, set, false, true); return ret; } /// Prints a formatted string to this object int Printf(const char *Fmt, ...) { Empty(); va_list Arg; va_start(Arg, Fmt); int Bytes = Printf(Arg, Fmt); va_end(Arg); return Bytes; } /// Prints a varargs string int Printf(va_list &Arg, const char *Fmt) { Empty(); return LPrintf(*this, Fmt, Arg); } static LString Escape(const char *In, ssize_t Len = -1, const char *Chars = "\r\n\b\\\'\"") { LString s; if (In && Chars) { char Buf[256]; int Ch = 0; if (Len < 0) Len = strlen(In); while (Len-- > 0) { if (Ch > sizeof(Buf)-4) { // Buffer full, add substring to 's' Buf[Ch] = 0; s += Buf; Ch = 0; } if (strchr(Chars, *In)) { Buf[Ch++] = '\\'; switch (*In) { #undef EscChar #define EscChar(from, to) \ case from: Buf[Ch++] = to; break EscChar('\n', 'n'); EscChar('\r', 'r'); EscChar('\\', '\\'); EscChar('\b', 'b'); EscChar('\a', 'a'); EscChar('\t', 't'); EscChar('\v', 'v'); EscChar('\'', '\''); EscChar('\"', '\"'); EscChar('&', '&'); EscChar('?', '?'); #undef EscChar default: Ch += sprintf_s(Buf+Ch, sizeof(Buf)-Ch, "x%02x", *In); break; } } else Buf[Ch++] = *In; In++; } if (Ch > 0) { Buf[Ch] = 0; s += Buf; } } return s; } template static LString UnEscape(const T *In, ssize_t Len = -1) { if (!In) return LString(); LString s; if (Len < 0) // As memory allocation/copying around data is far slower then // just scanning the string for size... don't try and chunk the // processing. Len = Strlen(In); if (!s.Length(Len)) return LString(); auto *Out = s.Get(); auto *End = In + Len; while (In < End) { if (*In == '\\') { In++; switch (*In) { case 'n': case 'N': *Out++ = '\n'; break; case 'r': case 'R': *Out++ = '\r'; break; case 'b': case 'B': *Out++ = '\b'; break; case 't': case 'T': *Out++ = '\t'; break; default: *Out++ = *In; break; case 0: break; } if (*In) In++; else break; } else *Out++ = *In++; } // Trim excess size off string s.Length(Out - s.Get()); return s; } static LString UnEscape(LString s) { return UnEscape(s.Get(), s.Length()); } #if defined(__GTK_H__) #elif defined(MAC) // && __COREFOUNDATION_CFBASE__ LString(const CFStringRef r) { Str = NULL; *this = r; } LString &operator =(CFStringRef r) { if (r) { CFIndex length = CFStringGetLength(r); CFRange range = CFRangeMake(0, length); CFIndex usedBufLen = 0; CFIndex slen = CFStringGetBytes(r, range, kCFStringEncodingUTF8, '?', false, NULL, 0, &usedBufLen); if (Set(NULL, usedBufLen)) { slen = CFStringGetBytes( r, range, kCFStringEncodingUTF8, '?', false, (UInt8*)Str->Str, Str->Len, &usedBufLen); Str->Str[usedBufLen] = 0; // NULL terminate } } return *this; } CFStringRef CreateStringRef() { char *s = Get(); if (!s) return NULL; return CFStringCreateWithCString(kCFAllocatorDefault, s, kCFStringEncodingUTF8); } #ifdef __OBJC__ NSString *NsStr() { if (Str) return [[NSString alloc] initWithBytes:Str->Str length:Str->Len encoding:NSUTF8StringEncoding]; return nil; } bool operator=(NSString *const s) { *this = [s UTF8String]; return !IsEmpty(); } LString(NSString *const s) { Str = NULL; *this = [s UTF8String]; } #endif #endif };