Evo C++ Library v0.5.1
str.h
Go to the documentation of this file.
1 // Evo C++ Library
2 /* Copyright 2019 Justin Crowell
3 Distributed under the BSD 2-Clause License -- see included file LICENSE.txt for details.
4 */
6 
7 #pragma once
8 #ifndef INCL_evo_impl_str_h
9 #define INCL_evo_impl_str_h
10 
11 #include "sys.h"
12 #include "../meta.h"
13 
14 // Disable certain MSVC warnings for this file
15 #if defined(_MSC_VER)
16  #pragma warning(push)
17  #pragma warning(disable:4146)
18 #endif
19 
20 namespace evo {
23 
25 
27 namespace impl {
28  static const char CHARMAP_TYPE_MASK = 0x07;
29  inline const char* CHARMAP_TYPE()
30  { return "@@@@@@@@@AAAAA@@@@@@@@@@@@@@@@@@ABBBBBBBBBBBBBBBCCCCCCCCCCBBBBBBBDDDDDDDDDDDDDDDDDDDDDDDDDDBBBBBBEEEEEEEEEEEEEEEEEEEEEEEEEEBBBB@"; }
31  inline const char* CHARMAP_BREAK_TYPE()
32  { return "@@@@@@@@@AAAAA@@@@@@@@@@@@@@@@@@ADB@@D@BCD@@DDDDEEEEEEEEEE@DC@DD@EEEEEEEEEEEEEEEEEEEEEEEEEEC@D@EBEEEEEEEEEEEEEEEEEEEEEEEEEEC@D@@"; }
33 
34  static const int CHARMAP_ALPHA_LEN = 36;
35 
36  static const int CHARMAP_UPPER_FIRST = 65;
37  static const int CHARMAP_UPPER_LAST = 90;
38  inline const char* CHARMAP_UPPER()
39  { return "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; }
40 
41  static const int CHARMAP_LOWER_FIRST = 97;
42  static const int CHARMAP_LOWER_LAST = 122;
43  inline const char* CHARMAP_LOWER()
44  { return "abcdefghijklmnopqrstuvwxyz"; }
45 };
49 enum CharType {
50  ctOTHER = 0,
56 };
57 
60  cbtOTHER = 0,
66 };
67 
76 inline CharType ascii_type(char ch) {
77  if (ch & 0x80)
78  return ctOTHER;
79  return (CharType)(impl::CHARMAP_TYPE()[(int)ch] & impl::CHARMAP_TYPE_MASK);
80 }
81 
91 inline CharBreakType ascii_breaktype(char ch) {
92  if (ch & 0x80)
93  return cbtOTHER;
94  return (CharBreakType)(impl::CHARMAP_BREAK_TYPE()[(int)ch] & impl::CHARMAP_TYPE_MASK);
95 }
96 
105 inline char ascii_toupper(char ch) {
106  if (ch < impl::CHARMAP_LOWER_FIRST || ch > impl::CHARMAP_LOWER_LAST)
107  return ch;
108  return impl::CHARMAP_UPPER()[ch - impl::CHARMAP_LOWER_FIRST];
109 }
110 
119 inline char ascii_tolower(char ch) {
120  if (ch < impl::CHARMAP_UPPER_FIRST || ch > impl::CHARMAP_UPPER_LAST)
121  return ch;
122  return impl::CHARMAP_LOWER()[ch - impl::CHARMAP_UPPER_FIRST];
123 }
124 
126 
133 static const wchar16 UNICODE_REPLACEMENT_CHAR = 0xFFFD;
134 
136 enum UtfMode {
141 };
142 
144 
160 inline const char* utf8_scan(wchar32& code, const char* str, const char* end, UtfMode mode=umREPLACE_INVALID) {
161  assert( str != NULL );
162  assert( str <= end );
163  const uchar LBITS_1 = 0x80;
164  const uchar LBITS_11 = 0xC0;
165  const uchar LBITS_111 = 0xE0;
166  const uchar LBITS_1111 = 0xF0;
167  const uchar LBITS_11111 = 0xF8;
168  const uchar RBITS_111111 = 0x3F;
169  const uchar RBITS_11111 = 0x1F;
170  const uchar RBITS_1111 = 0x0F;
171  const uchar RBITS_111 = 0x07;
172  for (const uchar* p; str < end; ) {
173  p = (const uchar*)str;
174  if ((*p & LBITS_1) != 0) {
175  if ((*p & LBITS_111) == LBITS_11) {
176  // 2 byte char
177  if (str+1 < end && (p[1] & LBITS_11) == LBITS_1) {
178  code = ((uint32)(p[0] & RBITS_11111) << 6) |
179  (uint32)(p[1] & RBITS_111111);
180  return str + 2;
181  }
182  } else if ((*p & LBITS_1111) == LBITS_111) {
183  // 3 byte char
184  if (str+2 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1) {
185  code = ((uint32)(p[0] & RBITS_1111) << 12) |
186  ((uint32)(p[1] & RBITS_111111) << 6) |
187  (uint32)(p[2] & RBITS_111111);
188  return str + 3;
189  }
190  } else if ((*p & LBITS_11111) == LBITS_1111) {
191  // 4 byte char
192  if (str+3 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1 && (p[3] & LBITS_11) == LBITS_1) {
193  code = ((uint32)(p[0] & RBITS_111) << 18) |
194  ((uint32)(p[1] & RBITS_111111) << 12) |
195  ((uint32)(p[2] & RBITS_111111) << 6) |
196  (uint32)(p[3] & RBITS_111111);
197  return str + 4;
198  }
199  }
200  // Invalid multi-byte char
201  if (mode == umREPLACE_INVALID) {
203  return str + 1;
204  } else if (mode == umSKIP_INVALID) {
205  ++str;
206  continue;
207  } else if (mode == umSTRICT) {
208  code = 1;
209  return NULL;
210  }
211  }
212  // 1 byte char
213  code = (wchar32)*p;
214  return str + 1;
215  }
216  code = 0;
217  return NULL;
218 }
219 
234 inline const char* utf8_scan_term(wchar32& code, const char* str, UtfMode mode=umREPLACE_INVALID) {
235  assert( str != NULL );
236  const uchar LBITS_1 = 0x80;
237  const uchar LBITS_11 = 0xC0;
238  const uchar LBITS_111 = 0xE0;
239  const uchar LBITS_1111 = 0xF0;
240  const uchar LBITS_11111 = 0xF8;
241  const uchar RBITS_111111 = 0x3F;
242  const uchar RBITS_11111 = 0x1F;
243  const uchar RBITS_1111 = 0x0F;
244  const uchar RBITS_111 = 0x07;
245  for (const uchar* p; *str != 0; ) {
246  p = (const uchar*)str;
247  if ((*p & LBITS_1) != 0) {
248  if ((*p & LBITS_111) == LBITS_11) {
249  // 2 byte char
250  if (str[1] != 0 && (p[1] & LBITS_11) == LBITS_1) {
251  code = ((uint32)(p[0] & RBITS_11111) << 6) |
252  (uint32)(p[1] & RBITS_111111);
253  return str + 2;
254  }
255  } else if ((*p & LBITS_1111) == LBITS_111) {
256  // 3 byte char
257  if (str[1] != 0 && str[2] != 0 &&
258  (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1) {
259  code = ((uint32)(p[0] & RBITS_1111) << 12) |
260  ((uint32)(p[1] & RBITS_111111) << 6) |
261  (uint32)(p[2] & RBITS_111111);
262  return str + 3;
263  }
264  } else if ((*p & LBITS_11111) == LBITS_1111) {
265  // 4 byte char
266  if (str[1] != 0 && str[2] != 0 && str[3] != 0 &&
267  (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1 && (p[3] & LBITS_11) == LBITS_1) {
268  code = ((uint32)(p[0] & RBITS_111) << 18) |
269  ((uint32)(p[1] & RBITS_111111) << 12) |
270  ((uint32)(p[2] & RBITS_111111) << 6) |
271  (uint32)(p[3] & RBITS_111111);
272  return str + 4;
273  }
274  }
275  // Invalid multi-byte char
276  if (mode == umREPLACE_INVALID) {
278  return str + 1;
279  } else if (mode == umSKIP_INVALID) {
280  ++str;
281  continue;
282  } else if (mode == umSTRICT) {
283  code = 1;
284  return NULL;
285  }
286  }
287  // 1 byte char
288  code = (wchar32)*p;
289  return str + 1;
290  }
291  code = 0;
292  return NULL;
293 }
294 
305 inline int utf8_compare(const char* str1, ulong len1, const char* str2, ulong len2) {
306  if (str1 == NULL) {
307  if (str2 != NULL)
308  return -1;
309  } else if (str2 == NULL) {
310  return 1;
311  } else if (str1 == str2) {
312  if (len1 < len2)
313  return -1;
314  else if (len1 > len2)
315  return 1;
316  } else {
317  const char* end1 = str1 + len1;
318  const char* end2 = str2 + len2;
319  for (;;) {
320  if (str1 >= end1) {
321  if (str2 >= end2)
322  break;
323  return -1;
324  } else if (str2 >= end2)
325  return 1;
326  else if (*str1 < *str2)
327  return -1;
328  else if (*str1 > *str2)
329  return 1;
330  ++str1;
331  ++str2;
332  }
333  }
334  return 0;
335 }
336 
346 inline int utf8_compare(const char* str1, ulong len1, const char* str2) {
347  if (str1 == NULL) {
348  if (str2 != NULL)
349  return -1;
350  } else if (str2 == NULL) {
351  return 1;
352  } else {
353  const char* end1 = str1 + len1;
354  for (;;) {
355  if (str1 >= end1) {
356  if (*str2 == 0)
357  break;
358  return -1;
359  } else if (*str2 == 0)
360  return 1;
361  else if (*str1 < *str2)
362  return -1;
363  else if (*str1 > *str2)
364  return 1;
365  ++str1;
366  ++str2;
367  }
368  }
369  return 0;
370 }
371 
380 inline int utf8_compare(const char* str1, const char* str2) {
381  if (str1 == NULL) {
382  if (str2 != NULL)
383  return -1;
384  } else if (str2 == NULL) {
385  return 1;
386  } else {
387  for (;;) {
388  if (*str1 == 0) {
389  if (*str2 == 0)
390  break;
391  return -1;
392  } else if (*str2 == 0)
393  return 1;
394  else if (*str1 < *str2)
395  return -1;
396  else if (*str1 > *str2)
397  return 1;
398  ++str1;
399  ++str2;
400  }
401  }
402  return 0;
403 }
404 
418 inline const char* utf8_min(const char* str, const char* end, bool strict=false, ulong mincount=1, uint minsize=2) {
419  assert( str != NULL );
420  assert( str <= end );
421  assert( mincount > 0 );
422  assert( minsize >= 2 );
423  assert( minsize <= 4 );
424  const uchar LBITS_1 = 0x80;
425  const uchar LBITS_11 = 0xC0;
426  const uchar LBITS_111 = 0xE0;
427  const uchar LBITS_1111 = 0xF0;
428  const uchar LBITS_11111 = 0xF8;
429  ulong count = 0;
430  for (const uchar* p; str < end; ) {
431  p = (const uchar*)str;
432  if ((*p & LBITS_1) != 0) {
433  if ((*p & LBITS_111) == LBITS_11) {
434  // 2 byte char
435  if (str+1 < end && (p[1] & LBITS_11) == LBITS_1) {
436  if (minsize <= 2 && ++count >= mincount)
437  return str;
438  str += 2;
439  continue;
440  }
441  } else if ((*p & LBITS_1111) == LBITS_111) {
442  // 3 byte char
443  if (str+2 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1) {
444  if (minsize <= 3 && ++count >= mincount)
445  return str;
446  str += 3;
447  continue;
448  }
449  } else if ((*p & LBITS_11111) == LBITS_1111) {
450  // 4 byte char
451  if (str+3 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1 && (p[3] & LBITS_11) == LBITS_1) {
452  if (minsize <= 4 && ++count >= mincount)
453  return str;
454  str += 4;
455  continue;
456  }
457  }
458  // Invalid multi-byte char
459  if (strict)
460  break;
461  }
462  // 1 byte char
463  ++str;
464  }
465  return NULL;
466 }
467 
481 inline ulong utf8_count(const char* str, const char* end, UtfMode mode=umREPLACE_INVALID) {
482  assert( str != NULL );
483  assert( str <= end );
484  const uchar LBITS_1 = 0x80;
485  const uchar LBITS_11 = 0xC0;
486  const uchar LBITS_111 = 0xE0;
487  const uchar LBITS_1111 = 0xF0;
488  const uchar LBITS_11111 = 0xF8;
489  ulong count = 0;
490  for (const uchar* p; str < end; ) {
491  p = (const uchar*)str;
492  if ((*p & LBITS_1) != 0) {
493  if ((*p & LBITS_111) == LBITS_11) {
494  // 2 byte char
495  if (str+1 < end && (p[1] & LBITS_11) == LBITS_1) {
496  ++count;
497  str += 2;
498  continue;
499  }
500  } else if ((*p & LBITS_1111) == LBITS_111) {
501  // 3 byte char
502  if (str+2 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1) {
503  ++count;
504  str += 3;
505  continue;
506  }
507  } else if ((*p & LBITS_11111) == LBITS_1111) {
508  // 4 byte char
509  if (str+3 < end && (p[1] & LBITS_11) == LBITS_1 && (p[2] & LBITS_11) == LBITS_1 && (p[3] & LBITS_11) == LBITS_1) {
510  ++count;
511  str += 4;
512  continue;
513  }
514  }
515  // Invalid multi-byte char
516  if (mode == umSKIP_INVALID) {
517  ++str;
518  continue;
519  } else if (mode == umSTRICT)
520  return NONE;
521  // umINCLUDE_INVALID & umREPLACE_INVALID count as 1 char
522  }
523  // 1 byte char
524  ++count;
525  ++str;
526  }
527  return count;
528 }
529 
543 inline ulong utf8_to16(const char*& str, const char* end, wchar16* outbuf=NULL, ulong outsize=0, UtfMode mode=umREPLACE_INVALID) {
544  assert( str != NULL );
545  assert( str <= end );
546  ulong written = 0;
547  wchar32 code;
548  const char* p;
549  if (outbuf == NULL) {
550  // Count UTF-16 size required (no writes)
551  for (;;) {
552  if ((p = utf8_scan(code, str, end, mode)) == NULL) {
553  if (code == 1)
554  return END; // Invalid input with mode umSTRICT
555  break;
556  }
557  if (code < 0x10000) {
558  // Single char
559  if (code >= 0xD800 && code <= 0xDFFF) {
560  if (mode == umSKIP_INVALID) {
561  str = p;
562  continue;
563  } else if (mode == umSTRICT)
564  return END; // Invalid input using reserved surrogate value with mode umSTRICT
565  // umINCLUDE_INVALID & umREPLACE_INVALID count as 1 char
566  }
567  ++written;
568  } else
569  // Surrogate pair
570  written += 2;
571  str = p;
572  }
573  } else {
574  // Write UTF-16
575  for (;;) {
576  if ((p = utf8_scan(code, str, end, mode)) == NULL) {
577  if (code == 1)
578  return END; // Invalid input with mode umSTRICT
579  break;
580  }
581  if (code < 0x10000) {
582  // Single char
583  if (code >= 0xD800 && code <= 0xDFFF) {
584  if (mode == umREPLACE_INVALID) {
586  } else if (mode == umSKIP_INVALID) {
587  str = p;
588  continue;
589  } else if (mode == umSTRICT)
590  return END; // Invalid input using reserved surrogate value with mode umSTRICT
591  }
592  if (written >= outsize)
593  break;
594  outbuf[written] = (wchar16)code;
595  ++written;
596  } else {
597  // Surrogate pair
598  if (written + 1 >= outsize)
599  break;
600  code -= 0x10000;
601  outbuf[written] = (wchar16)((code >> 10) & 0x3FF) + 0xD800;
602  outbuf[written+1] = (wchar16)(code & 0x3FF) + 0xDC00;
603  written += 2;
604  }
605  str = p;
606  }
607  }
608  return written;
609 }
610 
612 
630 inline const wchar16* utf16_scan(wchar32& code, const wchar16* str, const wchar16* end, UtfMode mode=umREPLACE_INVALID) {
631  assert( str != NULL );
632  assert( str <= end );
633  while (str < end) {
634  uint32 ch1 = (uint16)*str;
635  if (!(ch1 < 0xD800 || ch1 > 0xDFFF)) {
636  if (ch1 <= 0xDBFF && str+1 < end) {
637  uint32 ch2 = (uint16)str[1];
638  if (!(ch2 < 0xDC00 || ch2 > 0xDFFF)) {
639  // Valid surrogate pair
640  ch1 = ((ch1 - 0xD800) & 0x3FF) << 10;
641  ch2 = (ch2 - 0xDC00) & 0x3FF;
642  code = (ch1 | ch2) + 0x10000;
643  return str + 2;
644  }
645  }
646  // Invalid surrogate pair
647  if (mode == umREPLACE_INVALID) {
649  return str + 1;
650  } else if (mode == umSKIP_INVALID) {
651  ++str;
652  continue;
653  } else if (mode == umSTRICT) {
654  code = 1;
655  return NULL;
656  }
657  }
658  // Single char
659  code = ch1;
660  return str + 1;
661  }
662  code = 0;
663  return NULL;
664 }
665 
682 inline const wchar16* utf16_scan_term(wchar32& code, const wchar16* str, UtfMode mode=umREPLACE_INVALID) {
683  assert( str != NULL );
684  for (;;) {
685  uint32 ch1 = (uint16)*str;
686  if (ch1 == 0)
687  break;
688  if (!(ch1 < 0xD800 || ch1 > 0xDFFF)) {
689  if (ch1 <= 0xDBFF && str[1] != 0) {
690  uint32 ch2 = (uint16)str[1];
691  if (!(ch2 < 0xDC00 || ch2 > 0xDFFF)) {
692  // Valid surrogate pair
693  ch1 = ((ch1 - 0xD800) & 0x3FF) << 10;
694  ch2 = (ch2 - 0xDC00) & 0x3FF;
695  code = (ch1 | ch2) + 0x10000;
696  return str + 2;
697  }
698  }
699  // Invalid surrogate pair
700  if (mode == umREPLACE_INVALID) {
702  return str + 1;
703  } else if (mode == umSKIP_INVALID) {
704  ++str;
705  continue;
706  } else if (mode == umSTRICT) {
707  code = 1;
708  return NULL;
709  }
710  }
711  // Single char
712  code = ch1;
713  return str + 1;
714  }
715  code = 0;
716  return NULL;
717 }
718 
729 inline int utf16_compare(const wchar16* str1, ulong len1, const wchar16* str2, ulong len2) {
730  if (str1 == NULL) {
731  if (str2 != NULL)
732  return -1;
733  } else if (str2 == NULL) {
734  return 1;
735  } else if (str1 == str2) {
736  if (len1 < len2)
737  return -1;
738  else if (len1 > len2)
739  return 1;
740  } else {
741  const wchar16* end1 = str1 + len1;
742  const wchar16* end2 = str2 + len2;
743  wchar32 code1, code2;
744  for (;;) {
745  str1 = utf16_scan(code1, str1, end1, umINCLUDE_INVALID);
746  str2 = utf16_scan(code2, str2, end2, umINCLUDE_INVALID);
747  if (str1 == NULL) {
748  if (str2 == NULL)
749  break;
750  return -1;
751  } else if (str2 == NULL)
752  return 1;
753  else if (code1 < code2)
754  return -1;
755  else if (code1 > code2)
756  return 1;
757  }
758  }
759  return 0;
760 }
761 
771 inline int utf16_compare(const wchar16* str1, ulong len1, const wchar16* str2) {
772  if (str1 == NULL) {
773  if (str2 != NULL)
774  return -1;
775  } else if (str2 == NULL) {
776  return 1;
777  } else {
778  const wchar16* end1 = str1 + len1;
779  wchar32 code1, code2;
780  for (;;) {
781  str1 = utf16_scan(code1, str1, end1, umINCLUDE_INVALID);
782  str2 = utf16_scan_term(code2, str2, umINCLUDE_INVALID);
783  if (str1 == NULL) {
784  if (str2 == NULL)
785  break;
786  return -1;
787  } else if (str2 == NULL)
788  return 1;
789  else if (code1 < code2)
790  return -1;
791  else if (code1 > code2)
792  return 1;
793  }
794  }
795  return 0;
796 }
797 
806 inline int utf16_compare(const wchar16* str1, const wchar16* str2) {
807  if (str1 == NULL) {
808  if (str2 != NULL)
809  return -1;
810  } else if (str2 == NULL) {
811  return 1;
812  } else {
813  wchar32 code1, code2;
814  for (;;) {
815  str1 = utf16_scan_term(code1, str1, umINCLUDE_INVALID);
816  str2 = utf16_scan_term(code2, str2, umINCLUDE_INVALID);
817  if (str1 == NULL) {
818  if (str2 == NULL)
819  break;
820  return -1;
821  } else if (str2 == NULL)
822  return 1;
823  else if (code1 < code2)
824  return -1;
825  else if (code1 > code2)
826  return 1;
827  }
828  }
829  return 0;
830 }
831 
842 inline int utf16_compare8(const wchar16* str1, ulong len1, const char* str2, ulong len2) {
843  if (str1 == NULL) {
844  if (str2 != NULL)
845  return -1;
846  } else if (str2 == NULL) {
847  return 1;
848  } else {
849  const wchar16* end1 = str1 + len1;
850  const char* end2 = str2 + len2;
851  wchar32 code1, code2;
852  for (;;) {
853  str1 = utf16_scan(code1, str1, end1, umINCLUDE_INVALID);
854  str2 = utf8_scan(code2, str2, end2, umINCLUDE_INVALID);
855  if (str1 == NULL) {
856  if (str2 == NULL)
857  break;
858  return -1;
859  } else if (str2 == NULL)
860  return 1;
861  else if (code1 < code2)
862  return -1;
863  else if (code1 > code2)
864  return 1;
865  }
866  }
867  return 0;
868 }
869 
879 inline int utf16_compare8(const wchar16* str1, ulong len1, const char* str2) {
880  if (str1 == NULL) {
881  if (str2 != NULL)
882  return -1;
883  } else if (str2 == NULL) {
884  return 1;
885  } else {
886  const wchar16* end1 = str1 + len1;
887  wchar32 code1, code2;
888  for (;;) {
889  str1 = utf16_scan(code1, str1, end1, umINCLUDE_INVALID);
890  str2 = utf8_scan_term(code2, str2, umINCLUDE_INVALID);
891  if (str1 == NULL) {
892  if (str2 == NULL)
893  break;
894  return -1;
895  } else if (str2 == NULL)
896  return 1;
897  else if (code1 < code2)
898  return -1;
899  else if (code1 > code2)
900  return 1;
901  }
902  }
903  return 0;
904 }
905 
914 inline int utf16_compare8(const wchar16* str1, const char* str2) {
915  if (str1 == NULL) {
916  if (str2 != NULL)
917  return -1;
918  } else if (str2 == NULL) {
919  return 1;
920  } else {
921  wchar32 code1, code2;
922  for (;;) {
923  str1 = utf16_scan_term(code1, str1, umINCLUDE_INVALID);
924  str2 = utf8_scan_term(code2, str2, umINCLUDE_INVALID);
925  if (str1 == NULL) {
926  if (str2 == NULL)
927  break;
928  return -1;
929  } else if (str2 == NULL)
930  return 1;
931  else if (code1 < code2)
932  return -1;
933  else if (code1 > code2)
934  return 1;
935  }
936  }
937  return 0;
938 }
939 
946 inline ulong utf16_strlen(const wchar16* str) {
947  if (str == NULL)
948  return 0;
949  const wchar16* p = str;
950  while (*p != 0)
951  ++p;
952  return (ulong)(p - str);
953 }
954 
966 inline const wchar16* utf16_min(const wchar16* str, const wchar16* end, bool strict=false, uint mincount=1) {
967  assert( str != NULL );
968  assert( str <= end );
969  assert( mincount > 0 );
970  uint count = 0;
971  while (str < end) {
972  uint32 ch1 = (uint16)*str;
973  if (!(ch1 < 0xD800 || ch1 > 0xDFFF)) {
974  if (ch1 <= 0xDBFF && str+1 < end) {
975  uint32 ch2 = (uint16)str[1];
976  if (!(ch2 < 0xDC00 || ch2 > 0xDFFF)) {
977  // Valid surrogate pair
978  if (++count >= mincount)
979  return str;
980  str += 2;
981  continue;
982  }
983  }
984  // Invalid surrogate pair
985  if (strict)
986  break;
987  }
988  // Single char
989  ++str;
990  }
991  return NULL;
992 }
993 
1009 inline ulong utf16_count(const wchar16* str, const wchar16* end, UtfMode mode=umREPLACE_INVALID) {
1010  assert( str != NULL );
1011  assert( str <= end );
1012  ulong count = 0;
1013  while (str < end) {
1014  uint32 ch1 = (uint16)*str;
1015  if (!(ch1 < 0xD800 || ch1 > 0xDFFF)) {
1016  if (ch1 <= 0xDBFF && str+1 < end) {
1017  uint32 ch2 = (uint16)str[1];
1018  if (!(ch2 < 0xDC00 || ch2 > 0xDFFF)) {
1019  // Valid surrogate pair
1020  ++count;
1021  str += 2;
1022  continue;
1023  }
1024  }
1025  // Invalid surrogate pair
1026  if (mode == umSKIP_INVALID) {
1027  ++str;
1028  continue;
1029  } else if (mode == umSTRICT)
1030  return NONE;
1031  }
1032  // Single char
1033  ++count;
1034  ++str;
1035  }
1036  return count;
1037 }
1038 
1053 inline ulong utf16_to8(const wchar16*& str, const wchar16* end, char* outbuf=NULL, ulong outsize=0, UtfMode mode=umREPLACE_INVALID) {
1054  assert( str != NULL );
1055  assert( str <= end );
1056  ulong written = 0;
1057  wchar32 code;
1058  const wchar16* p;
1059  if (outbuf == NULL) {
1060  // Count UTF-8 size required (no writes)
1061  for (;;) {
1062  if ((p = utf16_scan(code, str, end, mode)) == NULL) {
1063  if (code == 1)
1064  return END; // Invalid input with mode umSTRICT
1065  break;
1066  }
1067  str = p;
1068  if (code < 0x0080) {
1069  // 1 byte char
1070  ++written;
1071  } else if (code < 0x0800) {
1072  // 2 byte char
1073  written += 2;
1074  } else if (code < 0x10000) {
1075  // 3 byte char
1076  written += 3;
1077  } else {
1078  // 4 byte char
1079  assert( code <= 0x10FFFF );
1080  written += 4;
1081  }
1082  }
1083  } else {
1084  // Write UTF-16
1085  const uchar LBITS_1 = 0x80;
1086  const uchar LBITS_11 = 0xC0;
1087  const uchar LBITS_111 = 0xE0;
1088  const uchar LBITS_1111 = 0xF0;
1089  const uchar RBITS_111111 = 0x3F;
1090  const uchar RBITS_11111 = 0x1F;
1091  const uchar RBITS_1111 = 0x0F;
1092  const uchar RBITS_111 = 0x07;
1093  uchar* out = (uchar*)outbuf;
1094  for (;;) {
1095  if ((p = utf16_scan(code, str, end, mode)) == NULL) {
1096  if (code == 1)
1097  return END; // Invalid input with mode umSTRICT
1098  break;
1099  }
1100  if (code < 0x0080) {
1101  // 1 byte char
1102  if (written >= outsize)
1103  break;
1104  out[written] = (uchar)code;
1105  ++written;
1106  } else if (code < 0x0800) {
1107  // 2 byte char
1108  if (written + 1 >= outsize)
1109  break;
1110  outbuf[written] = ((uchar)(code >> 6) & RBITS_11111) | LBITS_11;
1111  outbuf[written+1] = ((uchar)code & RBITS_111111) | LBITS_1;
1112  written += 2;
1113  } else if (code < 0x10000) {
1114  // 3 byte char
1115  if (written + 2 >= outsize)
1116  break;
1117  outbuf[written] = ((uchar)(code >> 12) & RBITS_1111) | LBITS_111;
1118  outbuf[written+1] = ((uchar)(code >> 6) & RBITS_111111) | LBITS_1;
1119  outbuf[written+2] = ((uchar)code & RBITS_111111) | LBITS_1;
1120  written += 3;
1121  } else {
1122  // 4 byte char
1123  assert( code <= 0x10FFFF );
1124  if (written + 3 >= outsize)
1125  break;
1126  outbuf[written] = ((uchar)(code >> 18) & RBITS_111) | LBITS_1111;
1127  outbuf[written+1] = ((uchar)(code >> 12) & RBITS_111111) | LBITS_1;
1128  outbuf[written+2] = ((uchar)(code >> 6) & RBITS_111111) | LBITS_1;
1129  outbuf[written+3] = ((uchar)code & RBITS_111111) | LBITS_1;
1130  written += 4;
1131  }
1132  str = p;
1133  }
1134  }
1135  return written;
1136 }
1137 
1139 
1150 inline const char* string_memrchr(const char* str, char ch, size_t size) {
1151  #if !defined(EVO_NO_MEMRCHR) && defined(EVO_GLIBC_MEMRCHR)
1152  return (char*)::memrchr(str, ch, size);
1153  #else
1154  for (const char* p = str + size; p > str; )
1155  if (*--p == ch)
1156  return p;
1157  return NULL;
1158  #endif
1159 }
1160 
1162 
1168 };
1169 
1170 // Implementation
1172 namespace impl {
1173  // String search alg helpers
1174 
1175  // Knuth-Morris-Pratt based search -- requires table[pattern_size]
1176  template<class T>
1177  inline ulong string_search_impl_kmp(T* table, const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset=0) {
1178  assert( pattern_size > 1 );
1179  assert( data_size > pattern_size );
1180 
1181  // Build partial match table
1182  table[0] = 0;
1183  for (uint len = 0, i = 1; i < pattern_size; ) {
1184  assert( len < pattern_size );
1185  if (pattern[i] == pattern[len]) {
1186  table[i] = (T)++len;
1187  ++i;
1188  } else if (len != 0) {
1189  len = table[len - 1];
1190  } else {
1191  table[i] = 0;
1192  ++i;
1193  }
1194  }
1195 
1196  // Do search
1197  for (ulong i = 0, j = 0; i < data_size; ) {
1198  if (data[i] == pattern[j]) {
1199  ++i;
1200  if (++j == pattern_size)
1201  return offset + (i - j);
1202  } else if (j == 0) {
1203  ++i;
1204  } else
1205  j = table[j - 1];
1206  }
1207  return NONE;
1208  }
1209 
1210  // Knuth-Morris-Pratt based reverse search -- requires table[pattern_size]
1211  template<class T>
1212  inline ulong string_search_impl_kmp_reverse(T* table, const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset=0) {
1213  // Mostly the same as the forward version, arrays are just indexed in reverse (so 0 is last char, 1 is 2nd last, etc)
1214  assert( pattern_size > 1 );
1215  assert( data_size > pattern_size );
1216  const uint pend = pattern_size - 1;
1217 
1218  // Build partial match table
1219  table[0] = 0;
1220  for (uint len = 0, i = 1; i < pattern_size; ) {
1221  assert( len < pattern_size );
1222  if (pattern[pend - i] == pattern[pend - len]) {
1223  table[i] = (T)++len;
1224  ++i;
1225  } else if (len != 0) {
1226  len = table[len - 1];
1227  } else {
1228  table[i] = 0;
1229  ++i;
1230  }
1231  }
1232 
1233  // Do search
1234  const uint dend = data_size - 1;
1235  for (ulong i = 0, j = 0; i < data_size; ) {
1236  if (data[dend - i] == pattern[pend - j]) {
1237  ++i;
1238  if (++j == pattern_size)
1239  return offset + (data_size - (i - j) - pattern_size);
1240  } else if (j == 0) {
1241  ++i;
1242  } else
1243  j = table[j - 1];
1244  }
1245  return NONE;
1246  }
1247 
1248  // Forward search
1249 
1250  // Find pattern in string, offset is added to result if found
1251  inline ulong string_search(const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1252  if (pattern_size > 0 && pattern_size <= data_size) {
1253  if (pattern_size == 1) {
1254  // Special case for single char
1255  const char* ptr = (char*)memchr(data, *pattern, data_size);
1256  if (ptr != NULL)
1257  return offset + (ulong)(ptr - data);
1258  } else if (pattern_size == data_size) {
1259  // Special case for pattern and key being same size
1260  if (memcmp(data, pattern, data_size) == 0)
1261  return offset;
1262  } else {
1263  #if !defined(EVO_NO_MEMMEM) && defined(EVO_GLIBC_MEMMEM)
1264  // memmem()
1265  const char* ptr = (char*)memmem(data, data_size, pattern, pattern_size);
1266  if (ptr != NULL)
1267  return offset + (ulong)(ptr - data);
1268  #else
1269  // KMP
1270  const uint8 STACK_BUF_SIZE = 128;
1271  if (pattern_size <= STACK_BUF_SIZE) {
1272  // Smaller patterns use stack table
1273  uint8 table[STACK_BUF_SIZE];
1274  return string_search_impl_kmp<uint8>(table, pattern, pattern_size, data, data_size, offset);
1275  } else {
1276  // Larger patterns must allocate a table
1277  uint* table = (uint*)::malloc(pattern_size * sizeof(uint));
1278  const ulong result = string_search_impl_kmp<uint>(table, pattern, pattern_size, data, data_size, offset);
1279  ::free(table);
1280  return result;
1281  }
1282  #endif
1283  }
1284  }
1285  return NONE;
1286  }
1287 
1288  // string_search() variant
1289  inline ulong string_search_kmp(const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1290  if (pattern_size > 0 && pattern_size <= data_size) {
1291  if (pattern_size == 1) {
1292  // Special case for single char
1293  const char* ptr = (char*)memchr(data, *pattern, data_size);
1294  if (ptr != NULL)
1295  return offset + (ulong)(ptr - data);
1296  } else if (pattern_size == data_size) {
1297  // Special case for pattern and key being same size
1298  if (memcmp(data, pattern, data_size) == 0)
1299  return offset;
1300  } else {
1301  const uint8 STACK_BUF_SIZE = 128;
1302  if (pattern_size <= STACK_BUF_SIZE) {
1303  // Smaller patterns use stack table
1304  uint8 table[STACK_BUF_SIZE];
1305  return string_search_impl_kmp<uint8>(table, pattern, pattern_size, data, data_size, offset);
1306  } else {
1307  // Larger patterns must allocate a table
1308  uint* table = (uint*)::malloc(pattern_size * sizeof(uint));
1309  const ulong result = string_search_impl_kmp<uint>(table, pattern, pattern_size, data, data_size, offset);
1310  ::free(table);
1311  return result;
1312  }
1313  }
1314  }
1315  return NONE;
1316  }
1317 
1318  // string_search() variant
1319  inline ulong string_search_basic(const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1320  if (pattern_size > 0 && pattern_size <= data_size) {
1321  if (pattern_size == 1) {
1322  // Special case for single char
1323  const char* ptr = (char*)memchr(data, *pattern, data_size);
1324  if (ptr != NULL)
1325  return offset + (ulong)(ptr - data);
1326  } else if (pattern_size == data_size) {
1327  // Special case for pattern and key being same size
1328  if (memcmp(data, pattern, data_size) == 0)
1329  return offset;
1330  } else {
1331  data_size = data_size + 1 - pattern_size; // no space for pattern after here
1332  const uchar pattern_first = (uchar)*pattern;
1333  const char* start = data;
1334  const char* end = data + data_size;
1335  while (data < end) {
1336  if ((data = (char*)memchr(data, pattern_first, data_size)) == NULL)
1337  break;
1338  if (memcmp(data, pattern, pattern_size) == 0)
1339  return offset + (ulong)(data - start);
1340  ++data;
1341  data_size = (ulong)(end - data);
1342  }
1343  }
1344  }
1345  return NONE;
1346  }
1347 
1348  // Select a string_search() variant dynamically
1349  inline ulong string_search(StringSearchAlg alg, const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1350  switch (alg) {
1351  case ssaKMP:
1352  return string_search_kmp(pattern, pattern_size, data, data_size, offset);
1353  case ssaBASIC:
1354  return string_search_basic(pattern, pattern_size, data, data_size, offset);
1355  default:
1356  return string_search(pattern, pattern_size, data, data_size, offset);
1357  }
1358  }
1359 
1360  // Reverse search
1361 
1362  // Find pattern in string with reverse search, offset is added to result if found -- default uses KMP
1363  inline ulong string_search_reverse(const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1364  if (pattern_size > 0 && pattern_size <= data_size) {
1365  if (pattern_size == 1) {
1366  // Special case for single char
1367  #if !defined(EVO_NO_MEMRCHR) && defined(EVO_GLIBC_MEMRCHR)
1368  const char* ptr = (char*)memrchr(data, *pattern, data_size);
1369  if (ptr != NULL)
1370  return offset + (ulong)(ptr - data);
1371  #else
1372  const uchar pattern_first = (uchar)*pattern;
1373  for (const char* ptr = data + data_size; ptr > data; )
1374  if (*--ptr == pattern_first)
1375  return offset + (ulong)(ptr - data);
1376  #endif
1377  } else if (pattern_size == data_size) {
1378  // Special case for pattern and key being same size
1379  if (memcmp(data, pattern, data_size) == 0)
1380  return offset;
1381  } else {
1382  const uint8 STACK_BUF_SIZE = 128;
1383  if (pattern_size <= STACK_BUF_SIZE) {
1384  // Smaller patterns use stack table
1385  uint8 table[STACK_BUF_SIZE];
1386  return string_search_impl_kmp_reverse<uint8>(table, pattern, pattern_size, data, data_size, offset);
1387  } else {
1388  // Larger patterns must allocate a table
1389  uint* table = (uint*)::malloc(pattern_size * sizeof(uint));
1390  const ulong result = string_search_impl_kmp_reverse<uint>(table, pattern, pattern_size, data, data_size, offset);
1391  ::free(table);
1392  return result;
1393  }
1394  }
1395  }
1396  return NONE;
1397  }
1398 
1399  // string_search_reverse() variant
1400  inline ulong string_search_reverse_basic(const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1401  if (pattern_size > 0 && pattern_size <= data_size) {
1402  if (pattern_size == 1) {
1403  // Special case for single char
1404  #if !defined(EVO_NO_MEMRCHR) && defined(EVO_GLIBC_MEMRCHR)
1405  const char* ptr = (char*)memrchr(data, *pattern, data_size);
1406  if (ptr != NULL)
1407  return offset + (ulong)(ptr - data);
1408  #else
1409  const uchar pattern_first = (uchar)*pattern;
1410  for (const char* ptr = data + data_size; ptr > data; )
1411  if (*--ptr == pattern_first)
1412  return offset + (ulong)(ptr - data);
1413  #endif
1414  } else if (pattern_size == data_size) {
1415  // Special case for pattern and key being same size
1416  if (memcmp(data, pattern, data_size) == 0)
1417  return offset;
1418  } else {
1419  data_size = data_size + 1 - pattern_size; // no space for pattern after here
1420  const uchar pattern_first = (uchar)*pattern;
1421  const char* ptr;
1422  #if !defined(EVO_NO_MEMRCHR) && defined(EVO_GLIBC_MEMRCHR)
1423  for (;;) {
1424  if ((ptr = (char*)memrchr(data, pattern_first, data_size)) == NULL)
1425  break;
1426  data_size = (ulong)(ptr - data);
1427  if (memcmp(ptr, pattern, pattern_size) == 0)
1428  return offset + data_size;
1429  if (ptr == data)
1430  break;
1431  }
1432  #else
1433  ptr = data + data_size; // data_size adjusted above so there's no space for pattern after here
1434  while (ptr > data)
1435  if (*--ptr == pattern_first && memcmp(ptr, pattern, pattern_size) == 0)
1436  return offset + (ulong)(ptr - data);
1437  #endif
1438  }
1439  }
1440  return NONE;
1441  }
1442 
1443  // Select a string_search_reverse() variant dynamically
1444  inline ulong string_search_reverse(StringSearchAlg alg, const char* pattern, uint pattern_size, const char* data, ulong data_size, ulong offset) {
1445  switch (alg) {
1446  case ssaBASIC:
1447  return string_search_reverse_basic(pattern, pattern_size, data, data_size, offset);
1448  case ssaKMP:
1449  default:
1450  return string_search_reverse(pattern, pattern_size, data, data_size, offset);
1451  }
1452  }
1453 }
1456 
1458 // Implementation
1460 namespace impl {
1461  // Convert string to integer value
1462  template<class T>
1463  T tonum(const char* str, ulong size, Error& error, int base) {
1464  const char* const end = str + size;
1465  bool neg = false;
1466  typename ToUnsigned<T>::Type num = 0;
1467  uchar ch;
1468 
1469  // Prefix
1470  while (str < end && ((ch=*str) == ' ' || ch == '\t'))
1471  ++str;
1472  if (str < end) {
1473  if ((ch=*str) == '+')
1474  ++str;
1475  else if (ch == '-')
1476  { neg = true; ++str; }
1477  }
1478  if (str == end)
1479  { error = EInval; return 0; }
1480 
1481  switch (base) {
1482  case 0:
1483  // Detect base
1484  switch (*str) {
1485  case '0':
1486  if (++str < end) {
1487  switch (*str) {
1488  case 'X': // fallthrough
1489  case 'x':
1490  // Hex
1491  base = 16;
1492  if (++str == end)
1493  { error = EInval; return 0; }
1494  break;
1495  case 'O': // fallthrough
1496  case 'o':
1497  // Octal
1498  base = 8;
1499  if (++str == end)
1500  { error = EInval; return 0; }
1501  break;
1502  case 'B': // fallthrough
1503  case 'b':
1504  // Binary
1505  base = 2;
1506  if (++str == end)
1507  { error = EInval; return 0; }
1508  break;
1509  default:
1510  // Octal
1511  base = 8;
1512  }
1513  } else
1514  // Decimal
1515  base = 10;
1516  break;
1517  case 'x':
1518  // Hex
1519  base = 16;
1520  if (++str == end)
1521  { error = EInval; return 0; }
1522  break;
1523  case 'o':
1524  // Octal
1525  base = 8;
1526  if (++str == end)
1527  { error = EInval; return 0; }
1528  break;
1529  case 'b':
1530  // Binary
1531  base = 2;
1532  if (++str == end)
1533  { error = EInval; return 0; }
1534  break;
1535  default:
1536  // Decimal
1537  base = 10;
1538  break;
1539  }
1540  break;
1541  case 16:
1542  // Skip hex prefix
1543  if (*str == 'x')
1544  ++str;
1545  else if (*str == '0' && str+1 < end && (str[1] == 'x' || str[1] == 'X'))
1546  str += 2;
1547  if (str == end)
1548  { error = EInval; return 0; }
1549  break;
1550  case 8:
1551  // Skip octal prefix
1552  if (*str == 'o')
1553  ++str;
1554  else if (*str == '0' && str+1 < end && (str[1] == 'o' || str[1] == 'O'))
1555  str += 2;
1556  if (str == end)
1557  { error = EInval; return 0; }
1558  break;
1559  case 2:
1560  // Skip binary prefix
1561  if (*str == 'b')
1562  ++str;
1563  else if (*str == '0' && str+1 < end && (str[1] == 'b' || str[1] == 'B'))
1564  str += 2;
1565  if (str == end)
1566  { error = EInval; return 0; }
1567  break;
1568  default:
1569  break;
1570  }
1571 
1572  // Limits
1573  const typename ToUnsigned<T>::Type limitnum = (neg ? (IntegerT<T>::SIGN ? -(typename ToSigned<T>::Type)IntegerT<T>::MIN : IntegerT<T>::MAX) : IntegerT<T>::MAX);
1574  const typename ToUnsigned<T>::Type limitbase = limitnum / (typename ToUnsigned<T>::Type)base;
1575 
1576  #if defined(EVO_OLDCC)
1577  const T MIN = IntegerT<T>::MIN;
1578  const T MAX = IntegerT<T>::MAX;
1579  #endif
1580 
1581  // Number
1582  error = ENone;
1583  for (; str < end; ++str) {
1584  ch = *str;
1585  if (ch >= '0' && ch <= '9')
1586  ch -= '0';
1587  else if (ch >= 'A' && ch <= 'Z')
1588  ch = ch - 'A' + 10;
1589  else if (ch >= 'a' && ch <= 'z')
1590  ch = ch - 'a' + 10;
1591  else {
1592  // Not a digit
1593  if (str < end && *str == '.' && base == 10) {
1594  // Ignore fraction
1595  ++str;
1596  while (str < end && (ch=*str) >= '0' && ch <= '9')
1597  ++str;
1598  }
1599  while (str < end && ((ch=*str) == ' ' || ch == '\t'))
1600  ++str;
1601  if (str == end)
1602  break; // ending whitespace ok
1603  error = EInval;
1604  break;
1605  }
1606  if (ch >= base)
1607  { error = EInval; break; }
1608  assert( ch < base );
1609  if (num > limitbase || (num == limitbase && ch > (limitnum % base))) {
1610  error = EOutOfBounds;
1611  #if defined(EVO_OLDCC) // fixes undefined reference on older compilers
1612  return (neg ? MIN : MAX);
1613  #else
1614  return (neg ? IntegerT<T>::MIN : IntegerT<T>::MAX);
1615  #endif
1616  }
1617  num *= (typename ToUnsigned<T>::Type)base;
1618  num += ch;
1619  }
1620  return (neg ? -(typename ToSigned<T>::Type)num : num);
1621  }
1622 
1623  // Convert string to floating-point value
1624  template<class T>
1625  T tonumf(const char* str, ulong size, Error& error) {
1626  const int MAXDIGITS = FloatT<T>::MAXDIGITS + 2;
1627  const int BASE = 10;
1628  const char* end = str + size;
1629  bool neg = false;
1630  uchar ch;
1631 
1632  // Trim ending whitespace
1633  while ( str < end && ((ch=end[-1]) == ' ' || ch == '\t') )
1634  --end;
1635 
1636  // Prefix
1637  while ( str < end && ((ch=*str) == ' ' || ch == '\t') )
1638  ++str;
1639  if (str < end) {
1640  switch (*str) {
1641  case '+': ++str; break;
1642  case '-': neg = true; ++str; break;
1643  }
1644  } else {
1645  error = EInval;
1646  return 0.0;
1647  }
1648 
1649  // INF, NaN
1650  error = ENone;
1651  switch (*str) {
1652  case 'i':
1653  case 'I':
1654  if ( end-str >= 3 &&
1655  ((ch=str[1]) == 'n' || ch == 'N') &&
1656  ((ch=str[2]) == 'f' || ch == 'F') )
1657  return (neg ? -FloatT<T>::inf() : FloatT<T>::inf());
1658  break;
1659  case 'n':
1660  case 'N':
1661  if ( FloatT<T>::NANOK && end-str >= 3 &&
1662  ((ch=str[1]) == 'a' || ch == 'A') &&
1663  ((ch=str[2]) == 'n' || ch == 'N') )
1664  return FloatT<T>::nan();
1665  break;
1666  }
1667 
1668  // Working data
1669  bool found_point = false;
1670  ulongl num = 0;
1671  int exp = 0;
1672  int exp_digits = 0;
1673  int sig_digits = 0;
1674  int digits = 0;
1675 
1676  // Significant digits
1677  while (str < end) {
1678  ch = *str;
1679  if (ch >= '0' && ch <= '9')
1680  // Decimal digit
1681  ch -= '0';
1682  else if (ch == '.') {
1683  // Decimal point
1684  if (found_point)
1685  { error = EInval; return 0.0; }
1686  found_point = true;
1687  ++str;
1688  continue;
1689  } else if (ch == 'e' || ch == 'E') {
1690  // Exponent
1691  if (digits == 0)
1692  { error = EInval; break; }
1693  if (++str < end) {
1694  bool exp_neg = false;
1695  switch (*str) {
1696  case '+': ++str; break;
1697  case '-': exp_neg = true; ++str; break;
1698  }
1699  for (; str < end; ++str) {
1700  if ((ch=*str) >= '0' && ch <= '9')
1701  ch -= '0';
1702  else
1703  break;
1704  exp *= BASE;
1705  exp += ch;
1706  }
1707  if (exp_neg)
1708  exp = -exp;
1709  }
1710  break;
1711  } else if (ch == '#') {
1712  // MSVC inf/nan
1713  ++str;
1714  if ( end-str >= 3 &&
1715  ((ch=str[0]) == 'I' || ch == 'i') &&
1716  ((ch=str[1]) == 'N' || ch == 'n') &&
1717  ((ch=str[2]) == 'F' || ch == 'f') )
1718  return (neg ? -FloatT<T>::inf() : FloatT<T>::inf());
1719  return FloatT<T>::nan();
1720  } else {
1721  // Invalid character
1722  error = EInval;
1723  break;
1724  }
1725 
1726  // Don't start counting digits until first significant digit
1727  if (found_point)
1728  ++exp_digits;
1729  if (sig_digits != 0 || ch != 0) {
1730  // Apply next digit
1731  if (sig_digits > MAXDIGITS) {
1732  ++exp;
1733  } else {
1734  num *= BASE;
1735  num += ch;
1736  }
1737  ++sig_digits;
1738  }
1739  ++digits; ++str;
1740  }
1741  if (digits == 0 || str != end)
1742  error = EInval;
1743 
1744  // Result
1745  T result;
1746  if (exp > FloatT<T>::maxexp())
1747  { result = FloatT<T>::inf(); error = EOutOfBounds; }
1748  else if (exp < FloatT<T>::minexp())
1749  { result = -FloatT<T>::inf(); error = EOutOfBounds; }
1750  else
1751  result = FloatT<T>::exp10((T)num, -exp_digits+exp);
1752  if (neg)
1753  result = -result;
1754  return result;
1755  }
1756 
1757  // Convert string to bool value
1758  template<class Size>
1759  bool tobool(const char* str, Size size, Error& error) {
1760  #define EVO_TMP_GET_CHAR_TOUPPER(INDEX) ch = str[INDEX]; if (ch >= 'a') ch -= 32;
1761  #define EVO_TMP_ELSE_INVALID else { error = EInval; return false; }
1762  #define EVO_TMP_TRUE_IF_CHAR(VAL) if (ch == VAL) { error = ENone; return true; } EVO_TMP_ELSE_INVALID
1763 
1764  char ch;
1765  while (size > 0 && ( (ch=*str) == ' ' || ch == '\t' ))
1766  { ++str; --size; }
1767  while (size > 0 && ( (ch=str[size-1]) == ' ' || ch == '\t' ))
1768  --size;
1769 
1770  switch (size) {
1771  case 1:
1772  EVO_TMP_GET_CHAR_TOUPPER(0);
1773  if ( ch == 'T' || ch == 'Y' || (ch >= '1' && ch <= '9') ) {
1774  error = ENone;
1775  return true;
1776  } else if ( ch == 'F' || ch == 'N' || ch == '0' )
1777  error = ENone;
1778  else
1779  error = EInval;
1780  return false;
1781  case 2:
1782  EVO_TMP_GET_CHAR_TOUPPER(0);
1783  if (ch == 'O') {
1784  EVO_TMP_GET_CHAR_TOUPPER(1);
1785  EVO_TMP_TRUE_IF_CHAR('N');
1786  }
1787  break;
1788  case 3:
1789  EVO_TMP_GET_CHAR_TOUPPER(0);
1790  if (ch == 'O') {
1791  EVO_TMP_GET_CHAR_TOUPPER(1);
1792  if (ch == 'F') {
1793  EVO_TMP_GET_CHAR_TOUPPER(2);
1794  if (ch == 'F') {
1795  error = ENone;
1796  return false;
1797  } EVO_TMP_ELSE_INVALID;
1798  } EVO_TMP_ELSE_INVALID;
1799  } else if (ch == 'Y') {
1800  EVO_TMP_GET_CHAR_TOUPPER(1);
1801  if (ch == 'E') {
1802  EVO_TMP_GET_CHAR_TOUPPER(2);
1803  EVO_TMP_TRUE_IF_CHAR('S');
1804  } EVO_TMP_ELSE_INVALID;
1805  }
1806  break;
1807  case 4:
1808  EVO_TMP_GET_CHAR_TOUPPER(0);
1809  if (ch == 'T') {
1810  EVO_TMP_GET_CHAR_TOUPPER(1);
1811  if (ch == 'R') {
1812  EVO_TMP_GET_CHAR_TOUPPER(2);
1813  if (ch == 'U') {
1814  EVO_TMP_GET_CHAR_TOUPPER(3);
1815  EVO_TMP_TRUE_IF_CHAR('E');
1816  } EVO_TMP_ELSE_INVALID;
1817  } EVO_TMP_ELSE_INVALID;
1818  }
1819  break;
1820  case 5:
1821  EVO_TMP_GET_CHAR_TOUPPER(0);
1822  if (ch == 'F') {
1823  EVO_TMP_GET_CHAR_TOUPPER(1);
1824  if (ch == 'A') {
1825  EVO_TMP_GET_CHAR_TOUPPER(2);
1826  if (ch == 'L') {
1827  EVO_TMP_GET_CHAR_TOUPPER(3);
1828  if (ch == 'S') {
1829  EVO_TMP_GET_CHAR_TOUPPER(4);
1830  if (ch == 'E') {
1831  error = ENone;
1832  return false;
1833  } EVO_TMP_ELSE_INVALID;
1834  } EVO_TMP_ELSE_INVALID;
1835  } EVO_TMP_ELSE_INVALID;
1836  } EVO_TMP_ELSE_INVALID;
1837  }
1838  break;
1839  default:
1840  break;
1841  }
1842  return (tonum<ulong>(str, size, error, 0) != 0);
1843 
1844  #undef EVO_TMP_GET_CHAR_TOUPPER
1845  #undef EVO_TMP_ELSE_INVALID
1846  #undef EVO_TMP_TRUE_IF_CHAR
1847  }
1848 
1849  // Format signed integer as string
1850  template<class T>
1851  static ulong fnum(char* endptr, T num, int base) {
1852  assert( endptr != NULL );
1853  assert( base >= 0 );
1854 
1855  const char* digits;
1856  if (base >= 100) {
1857  assert( base <= 136 );
1858  base -= 100;
1859  digits = "0123456789abcdefghijklmnopqrstuvwxyz";
1860  } else {
1861  assert( base <= 36 );
1862  digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1863  }
1864 
1865  char* ptr = endptr;
1866  if (num == 0)
1867  *--ptr = '0';
1868  else if (num > 0) {
1869  while (num != 0) {
1870  *--ptr = digits[num % (T)base];
1871  num /= (T)base;
1872  }
1873  } else {
1874  typename ToUnsigned<T>::Type unum = (typename ToUnsigned<T>::Type)-num;
1875  while (unum != 0) {
1876  *--ptr = digits[unum % (T)base];
1877  unum /= (T)base;
1878  }
1879  *--ptr = '-';
1880  }
1881  return (ulong)(endptr - ptr);
1882  }
1883 
1884  // Format unsigned integer as string
1885  template<class T>
1886  static ulong fnumu(char* endptr, T num, int base) {
1887  assert( endptr != NULL );
1888  assert( base >= 0 );
1889 
1890  const char* digits;
1891  if (base >= 100) {
1892  assert( base <= 136 );
1893  base -= 100;
1894  digits = "0123456789abcdefghijklmnopqrstuvwxyz";
1895  } else {
1896  assert( base <= 36 );
1897  digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1898  }
1899 
1900  char* ptr = endptr;
1901  if (num == 0)
1902  *--ptr = '0';
1903  else
1904  while (num != 0) {
1905  *--ptr = digits[num % (T)base];
1906  num /= (T)base;
1907  }
1908  return (ulong)(endptr - ptr);
1909  }
1910 
1911  // Weight floating point number so it's rounded to given precision
1912  template<class T>
1913  static T fnumf_weight(T num, int precision) {
1914  T dummy;
1915  if (num != 0.0 && evo_modf(num, &dummy) != 0.0) {
1916  T weight;
1917  if (precision < 0 || precision > 6) {
1918  weight = 0.5;
1919  for (int i=0; i<precision; ++i)
1920  weight /= 10.0;
1921  } else {
1922  const T TABLE[] = { (T)0.5, (T)0.05, (T)0.005, (T)0.0005, (T)0.00005, (T)0.000005, (T)0.0000005 };
1923  weight = TABLE[precision];
1924  }
1925 
1926  if (num < 0.0)
1927  num -= weight;
1928  else
1929  num += weight;
1930  }
1931  return num;
1932  }
1933 
1934  // Used by fnumf*() below
1935  template<class T> struct FloatNumF {
1936  static T dig_roundup() { return 0.00001f; }
1937  static T dig_roundup_limit() { return 0.1f; }
1938  };
1939  template<> struct FloatNumF<double> {
1940  static double dig_roundup() { return 0.00001; }
1941  static double dig_roundup_limit() { return 0.1; }
1942  };
1943  template<> struct FloatNumF<ldouble> {
1944  static ldouble dig_roundup() { return 0.00001L; }
1945  static ldouble dig_roundup_limit() { return 0.1L; }
1946  };
1947 
1948  // Format normalized floating point number as string using given precision
1949  template<class T>
1950  static ulong fnumf(char* ptr, T num, int exp, int precision) {
1951  const T PRECISION = FloatT<T>::precision();
1952  const char* startptr = ptr;
1953 
1954  // NaN
1955  if (FloatT<T>::nan(num)) {
1956  ptr[0] = 'n';
1957  ptr[1] = 'a';
1958  ptr[2] = 'n';
1959  return 3;
1960  }
1961 
1962  // Negative
1963  if (num < 0.0) {
1964  *ptr = '-'; ++ptr;
1965  num = -num;
1966  }
1967 
1968  // inf or -inf
1969  if (FloatT<T>::inf(num)) {
1970  ptr[0] = 'i';
1971  ptr[1] = 'n';
1972  ptr[2] = 'f';
1973  ptr += 3;
1974  return (ulong)(ptr - startptr);
1975  }
1976 
1977  // Fraction leading zeroes
1978  if (exp <= 0) {
1979  *ptr = '0'; ++ptr;
1980  if (precision <= 0)
1981  return (ulong)(ptr - startptr); // Done, ignore fraction
1982  *ptr = '.'; ++ptr;
1983  while (exp < 0 && precision > 0) {
1984  *ptr = '0'; ++ptr;
1985  ++exp; --precision;
1986  }
1987  }
1988 
1989  // This prevents some cases where formatting comes out rounded down
1990  num += FloatT<T>::eps();
1991 
1992  // Significant digits
1993  T digit_roundup = FloatNumF<T>::dig_roundup(); // Used to round out float noise on each digit
1994  int digit = 0, count = 0, digit_roundup_counter = FloatT<T>::MAXDIGITS - 1;
1995  while (num > PRECISION) {
1996  // Round out float noise more aggressively when reaching precision limit
1997  if (--digit_roundup_counter == 0)
1998  digit_roundup = FloatNumF<T>::dig_roundup_limit();
1999 
2000  // Get next digit
2001  num *= 10.0;
2002  digit = (int)(num + digit_roundup);
2003  if (digit > 9)
2004  digit = 9;
2005 
2006  // Precision check
2007  if (exp <= 0) {
2008  if (precision <= 0)
2009  break;
2010  --precision;
2011  }
2012 
2013  // Remove next digit
2014  num -= digit;
2015 
2016  // Add digit
2017  if (++count > FloatT<T>::MAXDIGITS)
2018  *ptr = '0'; // Exceeded max significant digits, use '0'
2019  else
2020  *ptr = '0' + (char)digit;
2021  ++ptr;
2022 
2023  // Decimal point
2024  if (--exp == 0) {
2025  if (precision <= 0)
2026  break; // Done, ignore fraction
2027  *ptr = '.'; ++ptr;
2028  }
2029  }
2030 
2031  // Whole number trailing zeroes
2032  if (exp > 0) {
2033  do {
2034  *ptr = '0'; ++ptr;
2035  } while (--exp > 0);
2036  if (precision > 0)
2037  { *ptr = '.'; ++ptr; }
2038  }
2039 
2040  // Fraction trailing zeroes
2041  while (precision > 0) {
2042  *ptr = '0'; ++ptr;
2043  --precision;
2044  }
2045 
2046  // Done
2047  return (ulong)(ptr - startptr);
2048  }
2049 
2050  // Format normalized floating point number as string using normal or e notation
2051  template<class T>
2052  static ulong fnumfe(char* ptr, T num, int exp, bool cap) {
2053  const T PRECISION = FloatT<T>::precision();
2054  const char* startptr = ptr;
2055 
2056  // NaN
2057  if (FloatT<T>::nan(num)) {
2058  ptr[0] = 'n';
2059  ptr[1] = 'a';
2060  ptr[2] = 'n';
2061  return 3;
2062  }
2063 
2064  // Zero
2065  if (num == 0.0) {
2066  *ptr = '0';
2067  return 1;
2068  }
2069 
2070  // Negative
2071  if (num < 0.0) {
2072  *ptr = '-'; ++ptr;
2073  num = -num;
2074  }
2075 
2076  // inf or -inf
2077  if (FloatT<T>::inf(num)) {
2078  ptr[0] = 'i';
2079  ptr[1] = 'n';
2080  ptr[2] = 'f';
2081  ptr += 3;
2082  return (ulong)(ptr - startptr);
2083  }
2084 
2085  // Adjust for exponent notation
2086  const int E_THRESHOLD = (FloatT<T>::MAXDIGITS > 6 ? 6 : FloatT<T>::MAXDIGITS);
2087  bool on_fraction = false;
2088  bool show_e = false;
2089  if (exp < -2 || exp > E_THRESHOLD) {
2090  show_e = true;
2091  --exp;
2092  } else {
2093  // Fraction leading zeroes
2094  if (exp <= 0) {
2095  on_fraction = true;
2096  ptr[0] = '0';
2097  ptr[1] = '.';
2098  ptr += 2;
2099  while (exp < 0)
2100  { *ptr = '0'; ++ptr; ++exp; }
2101  }
2102  }
2103 
2104  // This prevents some cases where formatting comes out rounded down
2105  num += FloatT<T>::eps();
2106 
2107  // Significant digits
2108  T digit_roundup = FloatNumF<T>::dig_roundup(); // Used to round out float noise on each digit
2109  int digit = 0, count = 0, zero_count = 0, digit_roundup_counter = FloatT<T>::MAXDIGITS - 1;
2110  while (num > PRECISION) {
2111  // Round out float noise more aggressively when reaching precision limit
2112  if (--digit_roundup_counter == 0)
2113  digit_roundup = FloatNumF<T>::dig_roundup_limit();
2114 
2115  // Get next digit
2116  num *= 10.0;
2117  digit = (int)(num + digit_roundup);
2118  if (digit > 9)
2119  digit = 9;
2120 
2121  // Remove next digit
2122  num -= (T)digit;
2123 
2124  // Add digit
2125  if (++count > FloatT<T>::MAXDIGITS)
2126  break;
2127  *ptr = '0' + (char)digit;
2128  ++ptr;
2129 
2130  // Decimal point
2131  if (show_e) {
2132  // E notation has decimal point after 1st digit
2133  if (count == 1) {
2134  on_fraction = true;
2135  *ptr = '.';
2136  ++ptr;
2137  }
2138  } else if (--exp == 0) {
2139  // Normal notation decimal
2140  on_fraction = true;
2141  *ptr = '.';
2142  ++ptr;
2143  }
2144 
2145  // Track ending zeros for trimming
2146  if (on_fraction) {
2147  if (digit == 0)
2148  ++zero_count;
2149  else if (zero_count > 0)
2150  zero_count = 0;
2151  }
2152  }
2153 
2154  // Remove unnecessary ending zeros
2155  if (zero_count > 0)
2156  ptr -= zero_count;
2157  if (ptr[-1] == '.')
2158  --ptr;
2159 
2160  // Additional values
2161  if (show_e) {
2162  // Exponent
2163  if (exp != 0) {
2164  *ptr = (cap?'E':'e'); ++ptr;
2165  if (exp < 0)
2166  { *ptr = '-'; ++ptr; exp = -exp; }
2167  else
2168  { *ptr = '+'; ++ptr; }
2169  if (exp >= 100) {
2170  ptr[0] = '0' + ((exp / 100) % 10);
2171  ptr[1] = '0' + ((exp / 10) % 10);
2172  ptr[2] = '0' + (exp % 10);
2173  ptr += 3;
2174  } else if (exp >= 10) {
2175  ptr[0] = '0' + ((exp / 10) % 10);
2176  ptr[1] = '0' + (exp % 10);
2177  ptr += 2;
2178  } else
2179  { *ptr = '0' + (char)exp; ++ptr; }
2180  }
2181  } else {
2182  // Whole number trailing zeroes
2183  if (exp > 0) {
2184  do {
2185  *ptr = '0'; ++ptr;
2186  } while (--exp > 0);
2187  }
2188  }
2189 
2190  // Done
2191  return (ulong)(ptr - startptr);
2192  }
2193 
2194 #if defined(EVO_FNUMF_SPRINTF)
2195  static const uint FNUMF_SPRINTF_BUF_SIZE = UInt::MAXSTRLEN + 6;
2196 
2197  static ulong fnumf_sprintf_setup(char*& fmt, char* buf, double num, int precision) {
2198  if (precision < 0) {
2199  fmt = buf;
2200  fmt[0] = '%';
2201  fmt[1] = 'g';
2202  fmt[2] = '\0';
2203  } else {
2204  fmt = buf + FNUMF_SPRINTF_BUF_SIZE - 1;
2205  *--fmt = '\0';
2206  *--fmt = 'f';
2207  fmt -= fnumu(fmt, (uint)(precision < 0 ? 0 : precision), 10);
2208  *--fmt = '.';
2209  *--fmt = '%';
2210  }
2211  int result = ::snprintf(NULL, 0, fmt, num);
2212  return (ulong)(result < 0 ? 0 : result);
2213  }
2214 
2215  static ulong fnumf_sprintf_setup(char*& fmt, char* buf, float num, int precision) {
2216  return fnumf_sprintf_setup(fmt, buf, (double)num, precision);
2217  }
2218 
2219  static ulong fnumf_sprintf_setup(char*& fmt, char* buf, ldouble num, int precision) {
2220  if (precision < 0) {
2221  fmt = buf;
2222  fmt[0] = '%';
2223  fmt[1] = 'L';
2224  fmt[2] = 'g';
2225  fmt[3] = '\0';
2226  } else {
2227  fmt = buf + FNUMF_SPRINTF_BUF_SIZE - 1;
2228  *--fmt = '\0';
2229  *--fmt = 'f';
2230  *--fmt = 'L';
2231  fmt -= fnumu(fmt, (uint)(precision < 0 ? 0 : precision), 10);
2232  *--fmt = '.';
2233  *--fmt = '%';
2234  }
2235  int result = ::snprintf(NULL, 0, fmt, num);
2236  return (ulong)(result < 0 ? 0 : result);
2237  }
2238 #endif
2239 
2240  // Conversion helpers
2241  template<class T> struct ToBool {
2242  template<class Size> static T getbool(const char* data, Size size) {
2243  STATIC_ASSERT( IsBool<typename T::Type>::value, ToBool_Bool_Type_Required );
2244  T result;
2245  if (size > 0) {
2246  Error error;
2247  typename T::Type value = impl::tobool(data, size, error);
2248  if (error == ENone)
2249  result = value;
2250  }
2251  return result;
2252  }
2253  };
2254  template<class T>struct ToBoolPod {
2255  template<class Size> static T getbool(const char* data, Size size) {
2256  STATIC_ASSERT( IsBool<T>::value && IsPodType<T>::value, ToBool_POD_Type_Required );
2257  Error error;
2258  T result = impl::tobool(data, size, error);
2259  if (error != ENone)
2260  result = 0;
2261  return result;
2262  }
2263  };
2264  template<class T> struct ToNum {
2265  template<class Size> static T getnum(const char* data, Size size, int base) {
2266  STATIC_ASSERT( IsInt<typename T::Type>::value, ToNum_IntegerT_Type_Required );
2267  T result;
2268  if (size > 0) {
2269  Error error;
2270  typename T::Type value = impl::tonum<typename T::Type>(data, size, error, base);
2271  if (error == ENone)
2272  result = value;
2273  }
2274  return result;
2275  }
2276  };
2277  template<class T>struct ToNumPod {
2278  template<class Size> static T getnum(const char* data, Size size, int base) {
2279  STATIC_ASSERT( IsInt<T>::value && IsPodType<T>::value, ToNum_POD_Type_Required );
2280  Error error;
2281  T result = impl::tonum<T>(data, size, error, base);
2282  if (error != ENone)
2283  result = 0;
2284  return result;
2285  }
2286  };
2287  template<class T> struct ToNumf {
2288  template<class Size> static T getnum(const char* data, Size size) {
2289  STATIC_ASSERT( IsFloat<typename T::Type>::value, ToNumf_FloatT_Type_Required );
2290  T result;
2291  if (size > 0) {
2292  Error error;
2293  typename T::Type value = impl::tonumf<typename T::Type>(data, size, error);
2294  if (error == ENone)
2295  result = value;
2296  }
2297  return result;
2298  }
2299  };
2300  template<class T> struct ToNumfPod {
2301  template<class Size> static T getnum(const char* data, Size size) {
2302  STATIC_ASSERT( IsFloat<T>::value, ToNumf_POD_Type_Required );
2303  Error error;
2304  T result = impl::tonumf<T>(data, size, error);
2305  if (error != ENone)
2306  result = 0;
2307  return result;
2308  }
2309  };
2310 }
2313 
2318 enum FmtBase {
2320  fbAUTO = 0,
2321  fBIN = 2,
2322  fOCT = 8,
2323  fDEC = 10,
2324  fHEX = 16,
2325  fHEXL = 116
2326 };
2327 
2336 };
2337 
2342  fpCURRENT = -2,
2343  fPREC_AUTO = -1,
2344  fPREC0 = 0,
2351 };
2352 
2356 enum FmtAlign {
2361 };
2362 
2367 enum FmtWidth {
2369 };
2370 
2372 
2406 struct FmtSetNull {
2407  const char* str;
2409 
2411  FmtSetNull() : str(NULL), size(0)
2412  { }
2413 
2417  FmtSetNull(const char* null) : str(null), size((StrSizeT)strlen(null))
2418  { }
2419 
2424  FmtSetNull(const char* null, StrSizeT size) : str(null), size(size)
2425  { }
2426 
2430  FmtSetNull(const ListBase<char,StrSizeT>& null) : str(null.data_), size(null.size_)
2431  { }
2432 
2437  str = NULL;
2438  size = 0;
2439  return *this;
2440  }
2441 };
2442 
2481 struct FmtSetField {
2483  int width;
2484  char fill;
2485 
2491  FmtSetField(FmtAlign align=faCURRENT, int width=-1, char fill=0) : align(align), width(width), fill(fill)
2492  { }
2493 
2498  FmtSetField(int width, char fill=0) : align(faCURRENT), width(width), fill(fill)
2499  { }
2500 
2503  align = fLEFT;
2504  width = 0;
2505  fill = ' ';
2506  return *this;
2507  }
2508 
2514  void merge(const FmtSetField& src) {
2515  if (src.align > faCURRENT)
2516  align = src.align;
2517  if (src.width >= 0)
2518  width = src.width;
2519  if (src.fill != 0)
2520  fill = src.fill;
2521  }
2522 
2529  static void setup_align(int& align_padleft, int& align_padright, int align_padding, const FmtSetField* field) {
2530  if (align_padding > 0) {
2531  switch (field->align) {
2532  default: // fallthrough
2533  case faCURRENT: // fallthrough
2534  case fLEFT:
2535  align_padleft = 0;
2536  align_padright = align_padding;
2537  break;
2538  case fCENTER:
2539  align_padleft = (align_padding / 2);
2540  align_padright = align_padding - align_padleft;
2541  break;
2542  case fRIGHT:
2543  align_padleft = align_padding;
2544  align_padright = 0;
2545  break;
2546  }
2547  } else
2548  align_padleft = align_padright = 0;
2549  }
2550 };
2551 
2589 struct FmtSetInt {
2590  int base;
2593  char pad_ch;
2594 
2601  FmtSetInt(int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0) : base(base), prefix(prefix), pad_width(width), pad_ch(ch)
2602  { }
2603 
2609  FmtSetInt(int base, int width, char ch=0) : base(base), prefix(fbpCURRENT), pad_width(width), pad_ch(ch)
2610  { }
2611 
2614  base = fDEC;
2615  prefix = fPREFIX0;
2616  pad_width = 0;
2617  pad_ch = '0';
2618  return *this;
2619  }
2620 
2626  void merge(const FmtSetInt& src) {
2627  if (src.base > fbCURRENT)
2628  base = src.base;
2629  if (src.prefix > fbpCURRENT)
2630  prefix = src.prefix;
2631  if (src.pad_width >= 0)
2632  pad_width = src.pad_width;
2633  if (src.pad_ch != 0)
2634  pad_ch = src.pad_ch;
2635  }
2636 
2637  // Internal helpers
2639  void impl_prefix_info(char& prefix_ch, uint& prefix_len) const {
2640  switch (prefix) {
2641  case fPREFIX1:
2642  switch (base) {
2643  case fHEXL: // fallthrough
2644  case fHEX:
2645  prefix_len = 1;
2646  prefix_ch = 'x';
2647  break;
2648  case fOCT:
2649  prefix_len = 1;
2650  prefix_ch = 'o';
2651  break;
2652  case fBIN:
2653  prefix_len = 1;
2654  prefix_ch = 'b';
2655  break;
2656  default:
2657  break;
2658  }
2659  break;
2660  case fPREFIX2:
2661  switch (base) {
2662  case fHEXL: // fallthrough
2663  case fHEX:
2664  prefix_len = 2;
2665  prefix_ch = 'x';
2666  break;
2667  case fOCT:
2668  prefix_len = 2;
2669  prefix_ch = 'o';
2670  break;
2671  case fBIN:
2672  prefix_len = 2;
2673  prefix_ch = 'b';
2674  break;
2675  default:
2676  break;
2677  }
2678  break;
2679  default:
2680  break;
2681  }
2682  }
2683  static void impl_prefix_write(char*& p, char prefix_ch, uint prefix_len) {
2684  switch (prefix_len) {
2685  case 1:
2686  *p = prefix_ch;
2687  ++p;
2688  break;
2689  case 2:
2690  *p = '0';
2691  p[1] = prefix_ch;
2692  p += 2;
2693  break;
2694  default:
2695  break;
2696  }
2697  }
2698  template<class T>
2699  void impl_num_write(char* p, T num, int digits, int width, int align_padding, const FmtSetField* field) const {
2700  int align_padleft, align_padright;
2701  FmtSetField::setup_align(align_padleft, align_padright, align_padding, field);
2702 
2703  if (align_padleft > 0) {
2704  memset(p, (int)(uchar)field->fill, align_padleft);
2705  p += align_padleft;
2706  }
2707 
2708  if (num < 0) {
2709  if (digits < width) {
2710  *p = '-';
2711  ++p;
2712 
2713  char* p0 = p;
2714  const uint padlen = width - digits;
2715  p += padlen + digits - 1;
2716  impl::fnum(p, num, fDEC);
2717 
2718  const int ch = (pad_ch == 0 ? '0' : (int)(uchar)pad_ch);
2719  memset(p0, ch, padlen);
2720  } else {
2721  p += digits;
2722  impl::fnum(p, num, fDEC);
2723  }
2724  } else {
2725  if (digits < width) {
2726  const uint padlen = width - digits;
2727  const int ch = (pad_ch == 0 ? '0' : (int)(uchar)pad_ch);
2728  memset(p, ch, padlen);
2729  p += padlen;
2730  }
2731  p += digits;
2732  impl::fnum(p, num, fDEC);
2733  }
2734 
2735  if (align_padright > 0)
2736  memset(p, (int)(uchar)field->fill, align_padright);
2737  }
2739 };
2740 
2775 struct FmtSetFloat {
2778  char pad_ch;
2779  char pad_ch_sp;
2780 
2787  FmtSetFloat(int precision=fpCURRENT, int width=-1, char ch=0, char ch_sp=0) : precision(precision), pad_width(width), pad_ch(ch), pad_ch_sp(ch_sp)
2788  { }
2789 
2794  precision = fPREC_AUTO;
2795  pad_width = 0;
2796  pad_ch = '0';
2797  pad_ch_sp = ' ';
2798  return *this;
2799  }
2800 
2806  void merge(const FmtSetFloat& src) {
2807  if (src.precision > fpCURRENT)
2808  precision = src.precision;
2809  if (src.pad_width >= 0)
2810  pad_width = src.pad_width;
2811  if (src.pad_ch != 0)
2812  pad_ch = src.pad_ch;
2813  if (src.pad_ch_sp != 0)
2814  pad_ch_sp = src.pad_ch_sp;
2815  }
2816 
2817  // Internal helpers
2819  template<class T>
2820  void impl_info(T& num, int& exp, int& maxlen, int align_width) const {
2821  if (precision < 0) {
2822  num = FloatT<T>::fexp10(exp, num);
2823  maxlen = FloatT<T>::MAXDIGITS_AUTO + 1; // add 1 for sign
2824  } else {
2825  num = FloatT<T>::fexp10(exp, impl::fnumf_weight(num, precision));
2826  maxlen = FloatT<T>::maxdigits_prec(exp, precision) + 1; // add 1 for sign
2827  }
2828  if (pad_width > maxlen)
2829  maxlen = pad_width;
2830  if (align_width > maxlen)
2831  maxlen = align_width;
2832  }
2833  template<class T>
2834  ulong impl_write(char* buf, T num, int exp, int align_width, const FmtSetField* field) const {
2835  char* p = buf;
2836  long len;
2837  if (precision < 0)
2838  len = (long)impl::fnumfe(p, num, exp, false);
2839  else
2840  len = (long)impl::fnumf(p, num, exp, precision);
2841 
2842  const int width = (pad_width > len ? pad_width : len);
2843  const int align_padding = (align_width > width ? align_width - width : 0);
2844 
2845  int align_padleft, align_padright;
2846  FmtSetField::setup_align(align_padleft, align_padright, align_padding, field);
2847 
2848  char* p1 = p + align_padleft;
2849  if (len < pad_width) {
2850  const int padlen = pad_width - len;
2851  if (FloatT<T>::nan(num) || FloatT<T>::inf(num)) {
2852  memmove(p1 + padlen, p, len);
2853  memset(p1, (int)(uchar)pad_ch_sp, padlen);
2854  } else if (num < 0.0) {
2855  memmove(p1 + 1 + padlen, p+1, len-1);
2856  memset(p1 + 1, (int)(uchar)pad_ch, padlen);
2857  *p1 = '-';
2858  } else {
2859  memmove(p1 + padlen, p, len);
2860  memset(p1, (int)(uchar)pad_ch, padlen);
2861  }
2862  if (align_padleft > 0)
2863  memset(p, (int)(uchar)field->fill, align_padleft);
2864  p = p1 + len + padlen;
2865  } else if (align_padleft > 0) {
2866  memmove(p1, p, len);
2867  memset(p, (int)(uchar)field->fill, align_padleft);
2868  p = p1 + len;
2869  } else
2870  p += len;
2871 
2872  if (align_padright > 0) {
2873  memset(p, (int)(uchar)field->fill, align_padright);
2874  p += align_padright;
2875  }
2876 
2877  return (ulong)(p - buf);
2878  }
2880 };
2881 
2886 struct FmtAttribs {
2891 
2893  FmtAttribs() : num_int(fbAUTO, fPREFIX0, 0, '0'), num_flt(fPREC_AUTO, 0, '0', ' '), field(fLEFT, 0, ' ')
2894  { }
2895 
2900  null.reset();
2901  num_int.reset();
2902  num_flt.reset();
2903  field.reset();
2904  return *this;
2905  }
2906 };
2907 
2909 
2914 struct FmtChar {
2915  char ch;
2916  uint count;
2917 
2922  FmtChar(char ch, ulong count) : ch(ch), count(count)
2923  { }
2924 };
2925 
2931 struct FmtString {
2932  typedef FmtString This;
2935 
2936  StringBase str;
2938 
2943  FmtString(const char* str, FmtAlign align=fLEFT) : str(str), fmt(align)
2944  { }
2945 
2952  FmtString(const char* str, StrSizeT size, int width=-1, char ch=0) : str(str, size), fmt(width, ch)
2953  { }
2954 
2962  FmtString(const char* str, StrSizeT size, FmtAlign align, int width=-1, char ch=0) : str(str, size), fmt(align, width, ch)
2963  { }
2964 
2970  FmtString(const StringBase& str, int width=-1, char ch=0) : str(str), fmt(width, ch)
2971  { }
2972 
2979  FmtString(const StringBase& str, FmtAlign align, int width=-1, char ch=0) : str(str), fmt(align, width, ch)
2980  { }
2981 
2988  FmtString(const FmtString& str, FmtAlign align, int width, char ch=0) : str(str.str), fmt(align, width, ch)
2989  { }
2990 
2996  This& width(int width, char ch=0) {
2997  fmt.width = width;
2998  fmt.fill = ch;
2999  return *this;
3000  }
3001 };
3002 
3005 
3006  StringBase str;
3007  int width;
3008  int indent;
3010 
3011  FmtStringWrap(const char* str, StrSizeT size, int width, int indent=0) : str(str, size), width(width), indent(indent)
3012  { }
3013 
3014  FmtStringWrap(const StringBase& str, int width) : str(str), width(width), indent(0)
3015  { }
3016 
3017  FmtStringWrap& set_indent(int new_indent=0) {
3018  indent = new_indent;
3019  return *this;
3020  }
3021 
3023  newline = nl;
3024  return *this;
3025  }
3026 
3028  newline = nl;
3029  return *this;
3030  }
3031 };
3032 
3033 // Implemented below
3034 template<class T> struct FmtFieldNum;
3035 template<class T> struct FmtFieldFloat;
3036 
3064 template<class T>
3065 struct FmtIntT {
3066  typedef FmtIntT<T> This;
3067  typedef typename IntegerT<T>::This IntClass;
3068  typedef T IntPod;
3070 
3071  T num;
3073 
3081  FmtIntT(T num, int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0) : num(num), fmt(base, prefix, width, ch)
3082  { }
3083 
3091  FmtIntT(const IntClass& num, int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0) : num(num.value()), fmt(base, prefix, width, ch)
3092  { }
3093 
3099  This& width(int width, char ch=0) {
3100  fmt.pad_width = width;
3101  fmt.pad_ch = ch;
3102  return *this;
3103  }
3104 };
3105 
3114 
3123 
3147 template<class T>
3148 struct FmtFloatT {
3149  typedef FmtFloatT<T> This;
3150  typedef typename FloatT<T>::This FloatClass;
3151  typedef T FloatPod;
3153 
3154  bool null;
3155  T num;
3157 
3165  FmtFloatT(T num, int precision=fpCURRENT, int width=-1, char ch='0', char ch_sp=' ') : null(false), num(num), fmt(precision, width, ch, ch_sp)
3166  { }
3167 
3175  FmtFloatT(const FloatClass& num, int precision=fpCURRENT, int width=-1, char ch='0', char ch_sp=' ') : null(num.null()), num(num.value()), fmt(precision, width, ch, ch_sp)
3176  { }
3177 
3184  This& width(int width, char ch=0, char ch_sp=0) {
3185  fmt.pad_width = width;
3186  fmt.pad_ch = ch;
3187  fmt.pad_ch_sp = ch_sp;
3188  return *this;
3189  }
3190 };
3191 
3195 
3197 
3200 template<class T>
3201 struct FmtFieldNum {
3205 
3206  FmtFieldNum(const FmtIntT<T>& num, FmtAlign align=fLEFT, int width=0, char fill=' ') : num(num), field(align, width, fill) {
3207  }
3208 
3209  FmtFieldNum(const This& src) : num(src.num), field(src.field) {
3210  }
3211 
3212  FmtFieldNum& operator=(const This& src) {
3213  num = src.num;
3214  field = src.field;
3215  return *this;
3216  }
3217 };
3218 
3221 template<class T>
3222 struct FmtFieldFloat {
3226 
3227  FmtFieldFloat(const FmtFloatT<T>& num, FmtAlign align=fLEFT, int width=0, char fill=' ') : num(num), field(align, width, fill) {
3228  }
3229 
3230  FmtFieldFloat(const This& src) : num(src.num), field(src.field) {
3231  }
3232 
3233  This& operator=(const This& src) {
3234  num = src.num;
3235  field = src.field;
3236  return *this;
3237  }
3238 };
3239 
3241 
3264 struct FmtPtr {
3265  const void* ptr;
3267 
3274  FmtPtr(const void* ptr, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0) : ptr(ptr), fmt(fHEX, prefix, width, ch) {
3275  }
3276 
3284  FmtPtr(const void* ptr, int base, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0) : ptr(ptr), fmt(base, prefix, width, ch) {
3285  }
3286 };
3287 
3289 
3294 struct FmtDump {
3295  const void* buf;
3296  ulong size;
3297  uint maxline;
3298  bool compact;
3299  bool upper;
3300 
3308  FmtDump(const void* buf, ulong size, uint maxline=24, bool compact=false, bool upper=false) : buf(buf), size(size), maxline(maxline), compact(compact), upper(upper)
3309  { }
3310 };
3311 
3313 
3314 }
3315 #if defined(_MSC_VER)
3316  #pragma warning(pop)
3317 #endif
3318 #endif
FmtIntT< int8 > FmtInt8
Explicitly format an integer.
Definition: str.h:3110
ListBase< char, StrSizeT > StringBase
Definition: str.h:3004
bool compact
Whether to use compact mode (no address or ASCII output)
Definition: str.h:3298
char ascii_tolower(char ch)
Convert ASCII character to lowercase.
Definition: str.h:119
ulong utf8_to16(const char *&str, const char *end, wchar16 *outbuf=NULL, ulong outsize=0, UtfMode mode=umREPLACE_INVALID)
Convert UTF-8 string to UTF-16 string.
Definition: str.h:543
CharBreakType ascii_breaktype(char ch)
Get ASCII character word-break type.
Definition: str.h:91
This & operator=(const This &src)
Definition: str.h:3233
Basic search using memchr() and memcmp()
Definition: str.h:1167
FmtSetField & reset()
Reset attributes to defaults (not unspecified).
Definition: str.h:2502
ulong size
Buffer size in bytes to dump.
Definition: str.h:3296
static T inf()
Get infinity value.
Definition: type.h:1346
This & width(int width, char ch=0)
Helper for setting padding attributes.
Definition: str.h:2996
FmtFloatT< float > FmtFloat
Explicitly format a floating pointer number.
Definition: str.h:3192
Out of bounds error.
Definition: sys.h:1129
Holds integer formatting attributes.
Definition: str.h:2589
This & width(int width, char ch=0)
Helper for setting padding attributes.
Definition: str.h:3099
1-digit floating point precision
Definition: str.h:2345
Holds a Newline value that can be null, which implicitly converts to NL_SYS (system default newline)...
Definition: sys.h:813
char pad_ch
Fill character, 0 if unspecified (use baseline or default) (default: &#39;0&#39;)
Definition: str.h:2593
FmtFloatT(const FloatClass &num, int precision=fpCURRENT, int width=-1, char ch='0', char ch_sp=' ')
Constructor for class number type (Float, etc) with all attributes.
Definition: str.h:3175
This pairs a FmtIntT type with FmtSetField for output formatting.
Definition: str.h:3034
FmtString(const StringBase &str, int width=-1, char ch=0)
Constructor.
Definition: str.h:2970
StringBase str
Definition: str.h:3006
int utf8_compare(const char *str1, ulong len1, const char *str2, ulong len2)
Compare two non-terminated UTF-8 strings.
Definition: str.h:305
CharType ascii_type(char ch)
Get ASCII character type.
Definition: str.h:76
Invalid argument or data.
Definition: sys.h:1123
bool upper
Whether to use uppercase hex, false for lowercase.
Definition: str.h:3299
FmtDump(const void *buf, ulong size, uint maxline=24, bool compact=false, bool upper=false)
Explicitly format a hex dump of given buffer.
Definition: str.h:3308
FmtIntT< int16 > FmtInt16
Explicitly format an integer.
Definition: str.h:3111
Punctuation used after a word ends, break words after this ( )]}>!%;,./ )
Definition: str.h:64
Base 16: hexadecimal.
Definition: str.h:2324
FmtSetField field
Definition: str.h:3204
Formatting attributes (used internally).
Definition: str.h:2886
static T nan()
Get Not-A-Number (NaN) value.
Definition: type.h:1369
Single character base prefix (x for hex, o for octal, b for binary)
Definition: str.h:2334
FmtFieldFloat< T > This
Definition: str.h:3223
FmtIntT< uint > FmtUInt
Explicitly format an integer.
Definition: str.h:3116
Definition: str.h:2368
Word character (A-Z, a-z, 0-9, _)
Definition: str.h:65
Automatic floating point precision – either normal decimal notation or E notation, whichever is shorter (default)
Definition: str.h:2343
Numeric digit character (0-9)
Definition: str.h:53
Evo implementation detail for system portability – this is included by most Evo headers, include this via: include <evo/type.h>.
FmtIntT< uint8 > FmtUInt8
Explicitly format an integer.
Definition: str.h:3119
Holds string to use when formatting null values.
Definition: str.h:2406
Whitespace character, used between words.
Definition: str.h:61
CharBreakType
Character break type returned by ascii_breaktype().
Definition: str.h:59
FloatT< T >::This FloatClass
Number class type.
Definition: str.h:3150
5-digit floating point precision
Definition: str.h:2349
char fill
Field fill character to pad up to width (default: &#39; &#39;)
Definition: str.h:2484
No floating point precision (whole numbers)
Definition: str.h:2344
int pad_width
Width to fill to, 0 for none, -1 to ignore (leave current width) (default: 0)
Definition: str.h:2777
ulong utf16_strlen(const wchar16 *str)
Find terminated UTF-16 string length.
Definition: str.h:946
uint count
Character repeat count.
Definition: str.h:2916
FmtFieldNum< T > FmtFieldType
This type paired with field info.
Definition: str.h:3069
Newline
Newline type.
Definition: sys.h:748
static T eps()
Get machine epsilon.
Definition: type.h:1377
int utf16_compare(const wchar16 *str1, ulong len1, const wchar16 *str2, ulong len2)
Compare two non-terminated UTF-16 strings.
Definition: str.h:729
FmtIntT< ushort > FmtUShort
Explicitly format an integer.
Definition: str.h:3115
IntegerT< T >::This IntClass
Number class type.
Definition: str.h:3067
FmtFloatT< double > FmtFloatD
Explicitly format a floating pointer number.
Definition: str.h:3193
FmtSetFloat(int precision=fpCURRENT, int width=-1, char ch=0, char ch_sp=0)
Constructor with all attributes.
Definition: str.h:2787
Other character type.
Definition: str.h:50
const wchar16 * utf16_min(const wchar16 *str, const wchar16 *end, bool strict=false, uint mincount=1)
Scan for UTF-16 surrogate pairs, which each require a pair of wchar16 values (4 bytes).
Definition: str.h:966
FmtFieldFloat(const FmtFloatT< T > &num, FmtAlign align=fLEFT, int width=0, char fill=' ')
Definition: str.h:3227
FmtString(const StringBase &str, FmtAlign align, int width=-1, char ch=0)
Constructor.
Definition: str.h:2979
No base prefix (default)
Definition: str.h:2333
This & width(int width, char ch=0, char ch_sp=0)
Helper for setting padding attributes.
Definition: str.h:3184
Basic integer type.
Definition: type.h:980
uint maxline
Maximum bytes per line to dump, 0 for none (all 1 line)
Definition: str.h:3297
FmtSetFloat fmt
Definition: str.h:3156
int width
Definition: str.h:3007
FmtFloatT< T > num
Definition: str.h:3224
bool null
Definition: str.h:3154
Check if type is a Plan Old Data type.
Definition: meta.h:528
FmtStringWrap & set_indent(int new_indent=0)
Definition: str.h:3017
No error.
Definition: sys.h:1115
FmtFieldNum< T > This
Definition: str.h:3202
FmtIntT< int32 > FmtInt32
Explicitly format an integer.
Definition: str.h:3112
int pad_width
Width to fill to, 0 for none, -1 if unspecified (use baseline or default) (default: 0) ...
Definition: str.h:2592
Skip invalid characters.
Definition: str.h:139
FmtFieldNum & operator=(const This &src)
Definition: str.h:3212
FmtSetNull & reset()
Reset to defaults.
Definition: str.h:2436
Error
General Evo error code stored in exceptions, or used directly when exceptions are disabled...
Definition: sys.h:1113
FmtAlign
Formatting alignment.
Definition: str.h:2356
FmtSetField(int width, char fill=0)
Short constructor with default alignment.
Definition: str.h:2498
FmtIntT(const IntClass &num, int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0)
Constructor for class number type (Int, etc) with all attributes.
Definition: str.h:3091
Punctuation or symbol character.
Definition: str.h:52
uint32 StrSizeT
Default Evo string size type.
Definition: sys.h:734
FmtIntT< long > FmtLong
Explicitly format an integer.
Definition: str.h:3108
const char * string_memrchr(const char *str, char ch, size_t size)
Evo implementation of memrchr() to search for character in reverse.
Definition: str.h:1150
Base 10: decimal (default)
Definition: str.h:2323
Include invalid characters – try to use as-is (dangerous)
Definition: str.h:137
FmtIntT< int > FmtInt
Explicitly format an integer.
Definition: str.h:3107
4-digit floating point precision
Definition: str.h:2348
const void * buf
Buffer to dump.
Definition: str.h:3295
static int maxdigits_prec(int exp, int precision)
Get max formatting digits with given exponent and precision, including sign and any additional chars ...
Definition: type.h:1306
Other charcater type.
Definition: str.h:60
Base 2: binary.
Definition: str.h:2321
ulong utf16_count(const wchar16 *str, const wchar16 *end, UtfMode mode=umREPLACE_INVALID)
Count Unicode character values in UTF-16 string.
Definition: str.h:1009
FmtIntT< short > FmtShort
Explicitly format an integer.
Definition: str.h:3106
Definition: str.h:3003
T Type
Translated type.
Definition: meta.h:373
ulong utf8_count(const char *str, const char *end, UtfMode mode=umREPLACE_INVALID)
Count Unicode character values in UTF-8 string.
Definition: str.h:481
T IntPod
Number POD type.
Definition: str.h:3068
CharType
Character type returned by ascii_type().
Definition: str.h:49
static const EndT END
Special integer value for indicating end of items or no item.
Definition: type.h:1846
ListBase< char, StrSizeT > StringBase
StringBase type.
Definition: str.h:2933
void merge(const FmtSetInt &src)
Merge source attributes (used internally).
Definition: str.h:2626
const char * str
Pointer to string for formatting null values, ignored if size is 0.
Definition: str.h:2407
Align center by adding filler on left and right sides.
Definition: str.h:2359
Default, uses system memmem() if possible (define EVO_NO_MEMMEM to prevent this), otherwise same as s...
Definition: str.h:1165
FmtSetInt & reset()
Reset attributes to defaults (not unspecified).
Definition: str.h:2613
FmtIntT< ulongl > FmtULongL
Explicitly format an integer.
Definition: str.h:3118
void merge(const FmtSetField &src)
Merge source attributes (used internally).
Definition: str.h:2514
T Type
Translated type.
Definition: meta.h:363
Explicitly format an integer.
Definition: str.h:3065
Base 8: octal.
Definition: str.h:2322
FmtString(const FmtString &str, FmtAlign align, int width, char ch=0)
Constructor with override fields for compatibility with FmtFieldType.
Definition: str.h:2988
FmtIntT< uint32 > FmtUInt32
Explicitly format an integer.
Definition: str.h:3121
FmtSetNull(const ListBase< char, StrSizeT > &null)
Constructor.
Definition: str.h:2430
FmtStringWrap(const StringBase &str, int width)
Definition: str.h:3014
FmtString(const char *str, StrSizeT size, int width=-1, char ch=0)
Constructor.
Definition: str.h:2952
FmtString(const char *str, FmtAlign align=fLEFT)
Constructor.
Definition: str.h:2943
FmtAlign align
Field alignment type (default: fLEFT)
Definition: str.h:2482
FmtSetNull(const char *null)
Constructor.
Definition: str.h:2417
char ascii_toupper(char ch)
Convert ASCII character to uppercase.
Definition: str.h:105
FmtString FmtFieldType
This type paired with field info.
Definition: str.h:2934
FmtFieldFloat(const This &src)
Definition: str.h:3230
FmtPrecision
Formatting floating point precision type.
Definition: str.h:2341
Current prefix (i.e. unspecified/default)
Definition: str.h:2332
FmtSetInt fmt
Formatting attributes.
Definition: str.h:3266
const char * utf8_scan(wchar32 &code, const char *str, const char *end, UtfMode mode=umREPLACE_INVALID)
Scan for next Unicode character in UTF-8 string.
Definition: str.h:160
void merge(const FmtSetFloat &src)
Merge from source (used internally).
Definition: str.h:2806
static const int MAXSTRLEN
Max formatted length, including either sign or hex/octal prefix (0x/0), but not both.
Definition: type.h:987
Evo C++ Library namespace.
Definition: alg.h:11
FmtIntT< uint16 > FmtUInt16
Explicitly format an integer.
Definition: str.h:3120
StringSearchAlg
String search algorithm selection.
Definition: str.h:1164
static const EndT NONE
Special integer value for indicating no item or unknown item.
Definition: type.h:1832
static T exp10(T num, int exp)
Multiply number by 10 raised to exponent.
Definition: type.h:1407
Stop or error on invalid character.
Definition: str.h:140
FmtIntT< T > This
This type
Definition: str.h:3066
char pad_ch_sp
Padding character used with special value (inf or nan), 0 to ignore (leave current fill character) (d...
Definition: str.h:2779
Holds floating point formatting attributes.
Definition: str.h:2775
FmtSetNull()
Constructor.
Definition: str.h:2411
FmtPtr(const void *ptr, int base, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0)
Constructor for formatting a pointer.
Definition: str.h:3284
Explicitly format a string.
Definition: str.h:2931
Uppercase alphabetic character (A-Z)
Definition: str.h:54
Replace invalid characters with UNICODE_REPLACEMENT_CHAR.
Definition: str.h:138
UtfMode
UTF decoding mode used to set how to handle invalid character values.
Definition: str.h:136
T num
Definition: str.h:3155
FmtSetInt fmt
Formatting attributes.
Definition: str.h:3072
int precision
Floating point precision (number of digits after decimal), fPREC_AUTO for automatic (default: fPREC_A...
Definition: str.h:2776
FmtFieldNum(const FmtIntT< T > &num, FmtAlign align=fLEFT, int width=0, char fill=' ')
Definition: str.h:3206
FmtStringWrap & set_newline(NewlineDefault nl)
Definition: str.h:3027
Check if type is a floating point type.
Definition: meta.h:277
FmtString This
This type
Definition: str.h:2932
FmtAttribs & reset()
Reset to defaults.
Definition: str.h:2899
FmtAttribs()
Constructor.
Definition: str.h:2893
static T fexp10(int &exp, T num)
Extract normalized base 10 mantissa and exponent from number.
Definition: type.h:1438
Knuth-Morris-Pratt using partial match table.
Definition: str.h:1166
FmtIntT< uint64 > FmtUInt64
Explicitly format an integer.
Definition: str.h:3122
FmtString(const char *str, StrSizeT size, FmtAlign align, int width=-1, char ch=0)
Constructor.
Definition: str.h:2962
Explicitly format a repeated character.
Definition: str.h:2914
Align left by adding filler on right side.
Definition: str.h:2358
FmtStringWrap & set_newline(Newline nl)
Definition: str.h:3022
int base
Base for formatting (default: fDEC)
Definition: str.h:2590
Default newline type, implicitly converts to NL_SYS (system default newline).
Definition: sys.h:773
Explicitly format a pointer.
Definition: str.h:3264
char pad_ch
Padding character, 0 to ignore (leave current fill character) (default: &#39;0&#39;)
Definition: str.h:2778
FmtWidth
Formatting field width.
Definition: str.h:2367
FmtFloatT(T num, int precision=fpCURRENT, int width=-1, char ch='0', char ch_sp=' ')
Constructor for POD number type with all attributes.
Definition: str.h:3165
StrSizeT size
String (str) size for formatting null values, 0 for none/empty
Definition: str.h:2408
FmtIntT< longl > FmtLongL
Explicitly format an integer.
Definition: str.h:3109
const wchar16 * utf16_scan_term(wchar32 &code, const wchar16 *str, UtfMode mode=umREPLACE_INVALID)
Scan for next Unicode character in terminated UTF-16 string.
Definition: str.h:682
FmtSetField field
Field alignment attributes.
Definition: str.h:2890
FmtIntT< T > num
Definition: str.h:3203
Base 16: hexadecimal (lowercase)
Definition: str.h:2325
int indent
Definition: str.h:3008
Whitespace character.
Definition: str.h:51
FmtChar(char ch, ulong count)
Constructor.
Definition: str.h:2922
static const wchar16 UNICODE_REPLACEMENT_CHAR
Unicode code point for "Replacement Character" used when decoding invalid UTF bytes or values...
Definition: str.h:133
FmtBasePrefix prefix
Formatting prefix type (default: fPREFIX0)
Definition: str.h:2591
Nullable basic floating-point base type.
Definition: type.h:1291
StringBase str
Definition: str.h:2936
3-digit floating point precision
Definition: str.h:2347
T FloatPod
Number POD type.
Definition: str.h:3151
static void setup_align(int &align_padleft, int &align_padright, int align_padding, const FmtSetField *field)
Used to setup and calculate alignment padding (used internally).
Definition: str.h:2529
FmtBasePrefix
Formatting integer base prefix type.
Definition: str.h:2331
int width
Field width to align in (default: 0)
Definition: str.h:2483
FmtPtr(const void *ptr, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0)
Constructor for formatting a pointer.
Definition: str.h:3274
2-digit floating point precision
Definition: str.h:2346
Lowercase alphabetic character (a-z)
Definition: str.h:55
const void * ptr
Pointer to format.
Definition: str.h:3265
FmtStringWrap(const char *str, StrSizeT size, int width, int indent=0)
Definition: str.h:3011
Punctuation used before a word begins, break words before this ( ([{< )
Definition: str.h:63
FmtIntT(T num, int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0)
Constructor for POD number type with all attributes.
Definition: str.h:3081
Double character base prefix (0x for hex, 0o for octal, 0b for binary)
Definition: str.h:2335
FmtSetField(FmtAlign align=faCURRENT, int width=-1, char fill=0)
Main constructor.
Definition: str.h:2491
Align right by adding filler on left side.
Definition: str.h:2360
FmtSetField field
Definition: str.h:3225
FmtFloatT< T > This
This type.
Definition: str.h:3149
FmtIntT< ulong > FmtULong
Explicitly format an integer.
Definition: str.h:3117
Current floating point precision (i.e. unspecified/default)
Definition: str.h:2342
FmtFieldNum(const This &src)
Definition: str.h:3209
This pairs a FmtFloatT type with FmtSetField for output formatting.
Definition: str.h:3035
Quote character, break word before or after this depending on whether begin or end quote (&#39;"`) ...
Definition: str.h:62
FmtSetInt num_int
Integer attributes.
Definition: str.h:2888
Check if type is an integer (whole number) type.
Definition: meta.h:248
int utf16_compare8(const wchar16 *str1, ulong len1, const char *str2, ulong len2)
Compare a non-terminated UTF-16 string to a non-terminated UTF-8 string.
Definition: str.h:842
char ch
Character to format.
Definition: str.h:2915
FmtSetField fmt
Definition: str.h:2937
FmtFloatT< ldouble > FmtFloatL
Explicitly format a floating pointer number.
Definition: str.h:3194
FmtSetInt(int base=fbCURRENT, FmtBasePrefix prefix=fbpCURRENT, int width=-1, char ch=0)
Main constructor with all attributes.
Definition: str.h:2601
Check if type is a boolean (true/false) type.
Definition: meta.h:229
const char * utf8_scan_term(wchar32 &code, const char *str, UtfMode mode=umREPLACE_INVALID)
Scan for next Unicode character in terminated UTF-8 string.
Definition: str.h:234
FmtFieldFloat< T > FmtFieldType
This type paired with field info.
Definition: str.h:3152
Explicitly format a floating pointer number.
Definition: str.h:3148
6-digit floating point precision
Definition: str.h:2350
Holds field and alignment formatting attributes.
Definition: str.h:2481
#define STATIC_ASSERT(EXP, TOKEN)
Assert compile-time expression is true or trigger compiler error.
Definition: meta.h:54
T num
Number to format.
Definition: str.h:3071
FmtSetNull(const char *null, StrSizeT size)
Constructor.
Definition: str.h:2424
NewlineValue newline
Definition: str.h:3009
static T precision()
Get best precision value.
Definition: type.h:1314
FmtSetNull null
String to use for null values from strings (like String) or primitives (like Int) ...
Definition: str.h:2887
const char * utf8_min(const char *str, const char *end, bool strict=false, ulong mincount=1, uint minsize=2)
Scan for UTF-8 multi-byte characters of at least minsize.
Definition: str.h:418
FmtSetInt(int base, int width, char ch=0)
Short constructor without prefix.
Definition: str.h:2609
Explicitly format a hex dump from buffer.
Definition: str.h:3294
Current base (i.e. unspecified/default)
Definition: str.h:2319
FmtBase
Formatting integer base.
Definition: str.h:2318
Current alignment (i.e. unspecified/default)
Definition: str.h:2357
FmtSetFloat num_flt
Floating point attributes.
Definition: str.h:2889
FmtSetFloat & reset()
Reset to defaults.
Definition: str.h:2793
ulong utf16_to8(const wchar16 *&str, const wchar16 *end, char *outbuf=NULL, ulong outsize=0, UtfMode mode=umREPLACE_INVALID)
Convert UTF-16 string to UTF-8 string.
Definition: str.h:1053
FmtIntT< int64 > FmtInt64
Explicitly format an integer.
Definition: str.h:3113
Auto base detection.
Definition: str.h:2320
const wchar16 * utf16_scan(wchar32 &code, const wchar16 *str, const wchar16 *end, UtfMode mode=umREPLACE_INVALID)
Scan for next Unicode character in UTF-16 string.
Definition: str.h:630