Evo C++ Library v0.5.1
strtok.h
Go to the documentation of this file.
1 // Evo C++ Library
2 /* Copyright 2019 Justin Crowell
3 Distributed under the BSD 2-Clause License -- see included file LICENSE.txt for details.
4 */
6 
7 #pragma once
8 #ifndef INCL_evo_strtok_h
9 #define INCL_evo_strtok_h
10 
11 #include "substring.h"
12 
13 namespace evo {
18 
20 
22 class StrTokBase {
23 public:
24  typedef StrTokBase BaseType;
26 
30  Size index() const
31  { return index_; }
32 
36  Char delim() const
37  { return delim_; }
38 
42  const SubString& value() const
43  { return value_; }
44 
45 protected:
47  Size index_;
50 
53  { index_ = END; }
54 
56  StrTokBase(const StringBase& string) :
57  string_( string ),
58  index_( END )
59  { }
60 
64  StrTokBase(const BaseType& src) :
65  string_ ( src.string_ ),
66  index_( src.index_ ),
67  delim_( src.delim_ ),
68  value_( src.value_ )
69  { }
70 
72  void copy(const BaseType& src) {
73  this->string_ = src.string_;
74  this->index_ = src.index_;
75  this->delim_ = src.delim_;
76  this->value_ = src.value_;
77  }
78 };
79 
81 
112 class StrTok : public StrTokBase {
113 public:
114  typedef StrTok ThisType;
117 
120  { }
121 
125  StrTok(const ThisType& src) : StrTokBase((const BaseType&)src)
126  { }
127 
131  StrTok(const BaseType& src) : StrTokBase(src)
132  { }
133 
139  StrTok(const StringBase& str) : StrTokBase(str)
140  { impl_reset(); }
141 
146  ThisType& operator=(const ThisType& src)
147  { this->copy(src); return *this; }
148 
153  ThisType& operator=(const BaseType& src)
154  { this->copy(src); return *this; }
155 
160  ThisType& operator=(const StringBase& str) {
161  this->string_ = str;
162  this->value_.set();
163  this->delim_.set();
164  impl_reset();
165  return *this;
166  }
167 
171  ThisType& reset() {
172  this->delim_.set();
173  this->value_.set();
174  impl_reset();
175  return *this;
176  }
177 
184  bool next(char delim) {
185  Size& ind = this->index_;
186  Size size = this->string_.size_;
187  if (ind > size) {
188  this->value_.set();
189  this->delim_.set();
190  return false;
191  }
192 
193  // Skip whitespace
194  const char* data = this->string_.data_;
195  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t'); )
196  ++ind;
197  if (ind == size) {
198  this->value_.setempty();
199  this->delim_.set();
200  ind = END;
201  return true;
202  }
203 
204  // Extract token
205  data += ind;
206  size -= ind;
207  const char* p = (char*)::memchr(data, delim, size);
208  if (p == NULL) {
209  this->value_.set(data, size).stripr();
210  this->delim_.set();
211  ind = END;
212  } else {
213  size = (Size)(p - data);
214  this->value_.set(data, size).stripr();
215  this->delim_ = delim;
216  ind = ind + size + 1;
217  }
218  return true;
219  }
220 
231  bool nextq(char delim) {
232  Size& ind = this->index_;
233  Size size = this->string_.size_;
234  if (ind > size) {
235  this->value_.set();
236  this->delim_.set();
237  return false;
238  }
239 
240  // Skip whitespace
241  const char* data = this->string_.data_;
242  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t'); )
243  ++ind;
244  if (ind == size) {
245  this->value_.setempty();
246  this->delim_.set();
247  ind = END;
248  return true;
249  }
250 
251  // Extract token
252  const char* start = data + ind;
253  const char* end = data + size;
254  const char* startq;
255  const char* endq;
256  const char* p = str_scan_endq(startq, endq, start, end, delim);
257  size = (Size)(endq - startq);
258  if (p == end) {
259  this->value_.set(startq, size);
260  this->delim_.set();
261  ind = END;
262  } else {
263  this->value_.set(startq, size);
264  this->delim_ = *p;
265  ind = (Size)(p + 1 - data);
266  }
267  if (startq == start)
268  this->value_.strip();
269  return true;
270  }
271 
278  bool nextw(char delim) {
279  Size& ind = this->index_;
280  Size size = this->string_.size_;
281  if (ind > size) {
282  this->value_.set();
283  this->delim_.set();
284  return false;
285  }
286 
287  // Skip whitespace and dup delims
288  const char* data = this->string_.data_;
289  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t' || ch == delim); )
290  ++ind;
291  if (ind == size) {
292  this->value_.set();
293  this->delim_.set();
294  ind = END;
295  return false;
296  }
297 
298  // Extract token
299  data += ind;
300  size -= ind;
301  const char* p = (char*)::memchr(data, delim, size);
302  if (p == NULL) {
303  this->value_.set(data, size).stripr();
304  this->delim_.set();
305  ind = END;
306  } else {
307  size = (Size)(p - data);
308  this->value_.set(data, size).stripr();
309  this->delim_ = delim;
310  ind = ind + size + 1;
311  }
312  return true;
313  }
314 
321  bool nextany(const StringBase& delims) {
322  Size& ind = this->index_;
323  Size size = this->string_.size_;
324  if (ind > size) {
325  this->value_.set();
326  this->delim_.set();
327  return false;
328  }
329 
330  // Skip whitespace
331  const char* data = this->string_.data_;
332  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t'); )
333  ++ind;
334  if (ind == size) {
335  this->value_.setempty();
336  this->delim_.set();
337  ind = END;
338  return true;
339  }
340 
341  // Extract token
342  size -= ind;
343  const char* start = data + ind;
344  const char* end = start + size;
345  const char* p = str_scan_delim(start, end, delims.data_, delims.size_);
346  size = (Size)(p - start);
347  this->value_.set(start, size).strip();
348  if (p == end) {
349  this->delim_.set();
350  ind = END;
351  } else {
352  this->delim_ = *p;
353  ind = (Size)(p + 1 - data);
354  }
355  return true;
356  }
357 
369  bool nextanyq(const StringBase& delims, char ws_delim) {
370  Size& ind = this->index_;
371  Size size = this->string_.size_;
372  if (ind > size) {
373  this->value_.set();
374  this->delim_.set();
375  return false;
376  }
377 
378  // Skip whitespace
379  const char* data = this->string_.data_;
380  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t'); )
381  ++ind;
382  if (ind == size) {
383  this->value_.setempty();
384  this->delim_.set();
385  ind = END;
386  return true;
387  }
388 
389  // Detect whitespace delim
390  char ws_delim_char;
391  if (ws_delim == 1) {
392  const StrSizeT ws_i = SubString(delims).findany(" \t\r\n", 4);
393  ws_delim_char = (ws_i == NONE ? 0 : delims.data_[ws_i]);
394  } else
395  ws_delim_char = ws_delim;
396 
397  // Extract token
398  const char* start = data + ind;
399  const char* end = data + size;
400  const char* startq;
401  const char* endq;
402  const char* p = str_scan_endq(startq, endq, start, end, delims.data_, delims.size_, ws_delim_char);
403  size = (Size)(endq - startq);
404  if (p == end) {
405  this->value_.set(startq, size);
406  this->delim_.set();
407  ind = END;
408  } else {
409  this->value_.set(startq, size);
410  this->delim_ = *p;
411  ind = (Size)(p + 1 - data);
412  }
413  if (startq == start)
414  this->value_.strip();
415  return true;
416  }
417 
428  bool nextanyq(const StringBase& delims) {
429  return nextanyq(delims, 1); // 1 to detect a whitespace delim
430  }
431 
438  Size skipws() {
439  const char* data = this->string_.data_;
440  const Size size = this->string_.size_;
441  Size& ind = this->index_;
442  for (char ch; ind < size && ((ch=data[ind]) == ' ' || ch == '\t'); )
443  ++ind;
444  return ind;
445  }
446 
458  template<class C,class T>
459  static typename C::Size split(C& items, const T& str, char delim=',') {
460  typename C::Size count = 0;
461  ThisType tok(str);
462  for (; tok.next(delim); ++count)
463  items.add(tok.value().convert<typename C::Item>());
464  return count;
465  }
466 
476  template<class T>
477  static SubString splitat(const T& str, Size index, char delim=',') {
478  SubString result;
479  ThisType tok(str);
480  for (Size i=0; tok.next(delim); ++i)
481  if (i == index)
482  { result = tok.value(); break; }
483  return result;
484  }
485 
486 private:
487  void impl_reset() {
488  const char* str_data_ = this->string_.data_;
489  Size str_size_ = this->string_.size_;
490  Size& ind_ = this->index_;
491  char ch;
492  ind_ = 0;
493  while ( ind_ < str_size_ && ((ch=str_data_[ind_]) == ' ' || ch == '\t') )
494  ++ind_;
495  if (ind_ >= str_size_)
496  ind_ = END;
497  }
498 };
499 
501 
532 class StrTokR : public StrTokBase {
533 public:
534  typedef StrTokR ThisType;
537 
540  { }
541 
545  StrTokR(const ThisType& src) : StrTokBase(src)
546  { }
547 
551  StrTokR(const BaseType& src) : StrTokBase(src)
552  { }
553 
557  StrTokR(const StringBase& str) : StrTokBase(str)
558  { impl_reset(); }
559 
564  ThisType& operator=(const ThisType& src)
565  { this->copy(src); return *this; }
566 
571  ThisType& operator=(const BaseType& src)
572  { this->copy(src); return *this; }
573 
578  ThisType& operator=(const StringBase& str) {
579  this->string_ = str;
580  this->value_.set();
581  this->delim_.set();
582  impl_reset();
583  return *this;
584  }
585 
589  ThisType& reset() {
590  this->value_.set();
591  this->delim_.set();
592  impl_reset();
593  return *this;
594  }
595 
600  bool next(char delim) {
601  Size& ind = this->index_;
602  Size size = this->string_.size_;
603  if (ind > size) {
604  this->value_.set();
605  this->delim_.set();
606  return false;
607  }
608 
609  // Skip whitespace
610  const char* data = this->string_.data_;
611  for (char ch; ind > 0 && ((ch=data[ind - 1]) == ' ' || ch == '\t'); )
612  --ind;
613  if (ind == 0) {
614  this->value_.setempty();
615  this->delim_.set();
616  ind = END;
617  return true;
618  }
619 
620  // Extract token
621  const char* p = string_memrchr(data, delim, ind);
622  if (p == NULL) {
623  this->value_.set(data, ind).stripl();
624  this->delim_.set();
625  ind = END;
626  } else {
627  size = (Size)(data + ind - (++p));
628  this->value_.set(p, size).stripl();
629  this->delim_ = delim;
630  ind = ind - size - 1;
631  }
632  return true;
633  }
634 
644  bool nextq(char delim) {
645  Size& ind = this->index_;
646  Size size = this->string_.size_;
647  if (ind > size) {
648  this->value_.set();
649  this->delim_.set();
650  return false;
651  }
652 
653  // Skip whitespace
654  const char* data = this->string_.data_;
655  for (char ch; ind > 0 && ((ch=data[ind - 1]) == ' ' || ch == '\t'); )
656  --ind;
657  if (ind == 0) {
658  this->value_.setempty();
659  this->delim_.set();
660  ind = END;
661  return true;
662  }
663 
664  // Extract token
665  const char* end = data + ind;
666  const char* startq;
667  const char* endq;
668  const char* p = str_scan_endq_r(startq, endq, data, end, delim);
669  size = (Size)(endq - startq);
670  p = str_scan_nws_r(data, p, delim);
671  if (p == data) {
672  this->delim_.set();
673  ind = END;
674  } else {
675  --p;
676  this->delim_ = *p;
677  ind = (Size)(p - data);
678  }
679  this->value_.set(startq, size);
680  if (endq == end)
681  this->value_.strip();
682  return true;
683  }
684 
691  bool nextw(char delim) {
692  Size& ind = this->index_;
693  Size size = this->string_.size_;
694  if (ind > size) {
695  this->value_.set();
696  this->delim_.set();
697  return false;
698  }
699 
700  // Skip whitespace
701  const char* data = this->string_.data_;
702  for (char ch; ind > 0 && ((ch=data[ind - 1]) == ' ' || ch == '\t' || ch == delim); )
703  --ind;
704  if (ind == 0) {
705  this->value_.set();
706  this->delim_.set();
707  ind = END;
708  return false;
709  }
710 
711  // Extract token
712  const char* p = string_memrchr(data, delim, ind);
713  if (p == NULL) {
714  this->value_.set(data, ind).stripl();
715  this->delim_.set();
716  ind = END;
717  } else {
718  size = (Size)(data + ind - (++p));
719  this->value_.set(p, size).stripl();
720  this->delim_ = delim;
721  ind = ind - size - 1;
722  }
723  return true;
724  }
725 
732  bool nextany(const StringBase& delims) {
733  Size& ind = this->index_;
734  Size size = this->string_.size_;
735  if (ind > size) {
736  this->value_.set();
737  this->delim_.set();
738  return false;
739  }
740 
741  // Skip whitespace
742  const char* data = this->string_.data_;
743  for (char ch; ind > 0 && ((ch=data[ind - 1]) == ' ' || ch == '\t'); )
744  --ind;
745  if (ind == 0) {
746  this->value_.setempty();
747  this->delim_.set();
748  ind = END;
749  return true;
750  }
751 
752  // Extract token
753  const char* end = data + ind;
754  const char* p = str_scan_delim_r(data, end, delims.data_, delims.size_);
755  size = (Size)(end - p);
756  this->value_.set(p, size).stripl();
757  if (p == data) {
758  this->delim_.set();
759  ind = END;
760  } else {
761  --p;
762  this->delim_ = *p;
763  ind = (Size)(p - data);
764  }
765  return true;
766  }
767 
779  bool nextanyq(const StringBase& delims, char ws_delim) {
780  Size& ind = this->index_;
781  Size size = this->string_.size_;
782  if (ind > size) {
783  this->value_.set();
784  this->delim_.set();
785  return false;
786  }
787 
788  // Skip whitespace
789  const char* data = this->string_.data_;
790  for (char ch; ind > 0 && ((ch=data[ind - 1]) == ' ' || ch == '\t'); )
791  --ind;
792  if (ind == 0) {
793  this->value_.setempty();
794  this->delim_.set();
795  ind = END;
796  return true;
797  }
798 
799  // Detect whitespace delim
800  char ws_delim_char;
801  if (ws_delim == 1) {
802  const StrSizeT ws_i = SubString(delims).findany(" \t\r\n", 4);
803  ws_delim_char = (ws_i == NONE ? 0 : delims.data_[ws_i]);
804  } else
805  ws_delim_char = ws_delim;
806 
807  // Extract token
808  const char* end = data + ind;
809  const char* startq;
810  const char* endq;
811  const char* p = str_scan_endq_r(startq, endq, data, end, delims.data_, delims.size_, ws_delim_char);
812  size = (Size)(endq - startq);
813  p = str_scan_nws_r(data, p, ws_delim_char);
814  if (p == data) {
815  this->delim_.set();
816  ind = END;
817  } else {
818  --p;
819  this->delim_ = *p;
820  ind = (Size)(p - data);
821  }
822  this->value_.set(startq, size);
823  if (endq == end)
824  this->value_.strip();
825  return true;
826  }
827 
838  bool nextanyq(const StringBase& delims) {
839  return nextanyq(delims, 1); // 1 to detect a whitespace delim
840  }
841 
853  template<class C,class T>
854  static typename C::Size split(C& items, const T& str, char delim=',') {
855  typename C::Size count = 0;
856  ThisType tok(str);
857  for (; tok.next(delim); ++count)
858  items.add(tok.value().convert<typename C::Item>());
859  return count;
860  }
861 
871  template<class T>
872  static SubString splitat(const T& str, Size index, char delim=',') {
873  SubString result;
874  ThisType tok(str);
875  for (Size i=0; tok.next(delim); ++i)
876  if (i == index)
877  { result = tok.value(); break; }
878  return result;
879  }
880 
881 private:
882  void impl_reset() {
883  const char* str_data_ = this->string_.data_;
884  Size& ind_ = this->index_;
885  char ch;
886  ind_ = this->string_.size_;
887  while ( ind_ > 0 && ((ch=str_data_[ind_-1]) == ' ' || ch == '\t') )
888  --ind_;
889  if (ind_ == 0)
890  ind_ = END;
891  }
892 };
893 
895 
926 class StrTokS : public StrTokBase {
927 public:
928  typedef StrTokS ThisType;
931 
934  { }
935 
939  StrTokS(const ThisType& src) : StrTokBase(src)
940  { }
941 
945  StrTokS(const BaseType& src) : StrTokBase(src)
946  { }
947 
951  StrTokS(const StringBase& str) : StrTokBase(str)
952  { this->index_ = (this->string_.size_ > 0 ? 0 : (Size)END); }
953 
958  ThisType& operator=(const ThisType& src)
959  { this->copy(src); return *this; }
960 
965  ThisType& operator=(const BaseType& src)
966  { this->copy(src); return *this; }
967 
972  ThisType& operator=(const StringBase& str) {
973  this->string_ = str;
974  this->value_.set();
975  this->delim_.set();
976  this->index_ = (this->string_.size_ > 0 ? 0 : (Size)END);
977  return *this;
978  }
979 
983  ThisType& reset() {
984  this->value_.set();
985  this->delim_.set();
986  this->index_ = (this->string_.size_ > 0 ? 0 : (Size)END);
987  return *this;
988  }
989 
994  bool next(char delim) {
995  Size& ind = this->index_;
996  Size size = this->string_.size_;
997  if (ind > size) {
998  this->value_.set();
999  this->delim_.set();
1000  return false;
1001  } else if (ind == size) {
1002  this->value_.setempty();
1003  this->delim_.set();
1004  ind = END;
1005  return true;
1006  }
1007 
1008  // Extract token
1009  size -= ind;
1010  const char* data = this->string_.data_ + ind;
1011  const char* p = (char*)::memchr(data, delim, size);
1012  if (p == NULL) {
1013  this->value_.set(data, size);
1014  this->delim_.set();
1015  ind = END;
1016  } else {
1017  size = (Size)(p - data);
1018  this->value_.set(data, size);
1019  this->delim_ = delim;
1020  ind = ind + size + 1;
1021  }
1022  return true;
1023  }
1024 
1031  bool nextw(char delim) {
1032  Size& ind = this->index_;
1033  Size size = this->string_.size_;
1034  if (ind > size) {
1035  this->value_.set();
1036  this->delim_.set();
1037  return false;
1038  }
1039 
1040  // Skip dup delims
1041  const char* data = this->string_.data_;
1042  while (ind < size && data[ind] == delim)
1043  ++ind;
1044  if (ind == size) {
1045  this->value_.set();
1046  this->delim_.set();
1047  ind = END;
1048  return false;
1049  }
1050 
1051  // Extract token
1052  data += ind;
1053  size -= ind;
1054  const char* p = (char*)::memchr(data, delim, size);
1055  if (p == NULL) {
1056  this->value_.set(data, size);
1057  this->delim_.set();
1058  ind = END;
1059  } else {
1060  size = (Size)(p - data);
1061  this->value_.set(data, size);
1062  this->delim_ = delim;
1063  ind = ind + size + 1;
1064  }
1065  return true;
1066  }
1067 
1072  bool nextany(const StringBase& delims) {
1073  Size& ind = this->index_;
1074  Size size = this->string_.size_;
1075  if (ind > size) {
1076  this->value_.set();
1077  this->delim_.set();
1078  return false;
1079  } else if (ind == size) {
1080  this->value_.setempty();
1081  this->delim_.set();
1082  ind = END;
1083  return true;
1084  }
1085 
1086  // Extract token
1087  size -= ind;
1088  const char* data = this->string_.data_;
1089  const char* start = data + ind;
1090  const char* end = start + size;
1091  const char* p = str_scan_delim(start, end, delims.data_, delims.size_);
1092  size = (Size)(p - start);
1093  this->value_.set(start, size);
1094  if (p == end) {
1095  this->delim_.set();
1096  ind = END;
1097  } else {
1098  this->delim_ = *p;
1099  ind = (Size)(p + 1 - data);
1100  }
1101  return true;
1102  }
1103 
1115  template<class C,class T>
1116  static typename C::Size split(C& items, const T& str, char delim=',') {
1117  typename C::Size count = 0;
1118  ThisType tok(str);
1119  for (; tok.next(delim); ++count)
1120  items.add(tok.value().convert<typename C::Item>());
1121  return count;
1122  }
1123 
1133  template<class T>
1134  static SubString splitat(const T& str, Size index, char delim=',') {
1135  SubString result;
1136  ThisType tok(str);
1137  for (Size i=0; tok.next(delim); ++i)
1138  if (i == index)
1139  { result = tok.value(); break; }
1140  return result;
1141  }
1142 };
1143 
1145 
1176 class StrTokRS : public StrTokBase {
1177 public:
1178  typedef StrTokRS ThisType;
1181 
1184  { }
1185 
1189  StrTokRS(const ThisType& src) : StrTokBase(src)
1190  { }
1191 
1195  StrTokRS(const BaseType& src) : StrTokBase(src)
1196  { }
1197 
1201  StrTokRS(const StringBase& str) : StrTokBase(str)
1202  { this->index_ = (str.size_ > 0 ? str.size_ : END); }
1203 
1208  ThisType& operator=(const ThisType& src)
1209  { this->copy(src); return *this; }
1210 
1215  ThisType& operator=(const BaseType& src)
1216  { this->copy(src); return *this; }
1217 
1222  ThisType& operator=(const StringBase& str) {
1223  this->string_ = str;
1224  this->index_ = (str.size_ > 0 ? str.size_ : END);
1225  this->value_.set();
1226  this->delim_.set();
1227  return *this;
1228  }
1229 
1233  ThisType& reset() {
1234  this->index_ = (this->string_.size_ > 0 ? this->string_.size_ : END);
1235  this->value_.set();
1236  this->delim_.set();
1237  return *this;
1238  }
1239 
1244  bool next(char delim) {
1245  Size& ind = this->index_;
1246  Size size = this->string_.size_;
1247  if (ind > size) {
1248  this->value_.set();
1249  this->delim_.set();
1250  return false;
1251  } else if (ind == 0) {
1252  this->value_.setempty();
1253  this->delim_.set();
1254  ind = END;
1255  return true;
1256  }
1257 
1258  // Extract token
1259  const char* data = this->string_.data_;
1260  const char* p = string_memrchr(data, delim, ind);
1261  if (p == NULL) {
1262  this->value_.set(data, ind);
1263  this->delim_.set();
1264  ind = END;
1265  } else {
1266  size = (Size)(data + ind - (++p));
1267  this->value_.set(p, size);
1268  this->delim_ = delim;
1269  ind = ind - size - 1;
1270  }
1271  return true;
1272  }
1273 
1280  bool nextw(char delim) {
1281  Size& ind = this->index_;
1282  Size size = this->string_.size_;
1283  if (ind > size) {
1284  this->value_.set();
1285  this->delim_.set();
1286  return false;
1287  }
1288 
1289  // Skip dup delims
1290  const char* data = this->string_.data_;
1291  while (ind > 0 && data[ind - 1] == delim)
1292  --ind;
1293  if (ind == 0) {
1294  this->value_.set();
1295  this->delim_.set();
1296  ind = END;
1297  return false;
1298  }
1299 
1300  // Extract token
1301  const char* p = string_memrchr(data, delim, ind);
1302  if (p == NULL) {
1303  this->value_.set(data, ind);
1304  this->delim_.set();
1305  ind = END;
1306  } else {
1307  size = (Size)(data + ind - (++p));
1308  this->value_.set(p, size);
1309  this->delim_ = delim;
1310  ind = ind - size - 1;
1311  }
1312  return true;
1313  }
1314 
1319  bool nextany(const StringBase& delims) {
1320  Size& ind = this->index_;
1321  Size size = this->string_.size_;
1322  if (ind > size) {
1323  this->value_.set();
1324  this->delim_.set();
1325  return false;
1326  } else if (ind == 0) {
1327  this->value_.setempty();
1328  this->delim_.set();
1329  ind = END;
1330  return true;
1331  }
1332 
1333  // Extract token
1334  const char* data = this->string_.data_;
1335  const char* end = data + ind;
1336  const char* p = str_scan_delim_r(data, end, delims.data_, delims.size_);
1337  size = (Size)(end - p);
1338  this->value_.set(p, size);
1339  if (p == data) {
1340  this->delim_.set();
1341  ind = END;
1342  } else {
1343  --p;
1344  this->delim_ = *p;
1345  ind = (Size)(p - data);
1346  }
1347  return true;
1348  }
1349 
1361  template<class C,class T>
1362  static typename C::Size split(C& items, const T& str, char delim=',') {
1363  typename C::Size count = 0;
1364  ThisType tok(str);
1365  for (; tok.next(delim); ++count)
1366  items.add(tok.value().convert<typename C::Item>());
1367  return count;
1368  }
1369 
1379  template<class T>
1380  static SubString splitat(const T& str, Size index, char delim=',') {
1381  SubString result;
1382  ThisType tok(str);
1383  for (Size i=0; tok.next(delim); ++i)
1384  if (i == index)
1385  { result = tok.value(); break; }
1386  return result;
1387  }
1388 };
1389 
1391 
1400 template<class T, bool (T::*NextCh)(char), bool (T::*NextAny)(const StringBase&)=&T::nextany>
1401 struct StrTokVariant : public T {
1405 
1407  { }
1408  StrTokVariant(const ThisType& src) : T((const T&)src)
1409  { }
1410  StrTokVariant(const BaseType& src) : T(src)
1411  { }
1412  StrTokVariant(const StringBase& str) : T(str)
1413  { }
1414 
1415  ThisType& operator=(const ThisType& src)
1416  { this->copy(src); return *this; }
1417  ThisType& operator=(const BaseType& src)
1418  { this->copy(src); return *this; }
1419  ThisType& operator=(const StringBase& str)
1420  { T::operator=(str); return *this; }
1421 
1422  bool next(char delim)
1423  { return ( ((T*)this)->*NextCh )(delim); }
1424  bool nextany(const StringBase& delims)
1425  { return ( ((T*)this)->*NextAny )(delims); }
1426 
1427  template<class C,class S>
1428  static typename C::Size split(C& items, const S& str, char delim=',') {
1429  typename C::Size count = 0;
1430  T tok(str);
1431  for (; (tok.*NextCh)(delim); ++count)
1432  items.add( tok.value().template convert<typename C::Item>() );
1433  return count;
1434  }
1435 
1436  template<class S>
1437  static SubString splitat(const S& str, Size index, char delim=',') {
1438  SubString result;
1439  T tok(str);
1440  for (Size i=0; (tok.*NextCh)(delim); ++i)
1441  if (i == index)
1442  { result = tok.value(); break; }
1443  return result;
1444  }
1445 };
1446 
1448 
1475 
1502 
1504 
1532 
1560 
1588 
1616 
1618 
1651 class StrTokLine : public StrTokBase {
1652 public:
1656 
1659  { }
1660 
1662  StrTokLine(const ThisType& src) : StrTokBase((const BaseType&)src)
1663  { }
1664 
1666  StrTokLine(const BaseType& src) : StrTokBase(src)
1667  { }
1668 
1670  StrTokLine(const StringBase& str) : StrTokBase(str)
1671  { impl_reset(); }
1672 
1674  ThisType& operator=(const ThisType& src)
1675  { this->copy(src); return *this; }
1676 
1678  ThisType& operator=(const BaseType& src)
1679  { this->copy(src); return *this; }
1680 
1682  ThisType& operator=(const StringBase& str) {
1683  this->string_ = str;
1684  this->value_.set();
1685  this->delim_.set();
1686  impl_reset();
1687  return *this;
1688  }
1689 
1691  ThisType& reset() {
1692  this->delim_.set();
1693  this->value_.set();
1694  impl_reset();
1695  return *this;
1696  }
1697 
1705  bool next() {
1706  Size& ind = this->index_;
1707  Size size = this->string_.size_;
1708  if (ind > size) {
1709  this->value_.set();
1710  this->delim_.set();
1711  return false;
1712  } else if (ind == size) {
1713  this->value_.setempty();
1714  this->delim_.set();
1715  ind = END;
1716  return true;
1717  }
1718 
1719  // Extract token
1720  size -= ind;
1721  const char* data = this->string_.data_;
1722  const char* start = data + ind;
1723  const char* end = start + size;
1724  const char* p = str_scan_delim(start, end, '\r', '\n');
1725  size = (Size)(p - start);
1726  this->value_.set(start, size);
1727  this->delim_.set();
1728  if (p == end) {
1729  ind = END;
1730  } else {
1731  ind = (Size)(p + 1 - data);
1732  if (p + 1 < end && ((*p == '\r' && p[1] == '\n') || (*p == '\n' && p[1] == '\r')))
1733  ++ind;
1734  }
1735  return true;
1736  }
1737 
1748  template<class C,class T>
1749  static typename C::Size split(C& items, const T& str) {
1750  typename C::Size count = 0;
1751  ThisType tok(str);
1752  for (; tok.next(); ++count)
1753  items.add(tok.value().convert<typename C::Item>());
1754  return count;
1755  }
1756 
1765  template<class T>
1766  static SubString splitat(const T& str, Size index) {
1767  SubString result;
1768  ThisType tok(str);
1769  for (Size i=0; tok.next(); ++i)
1770  if (i == index) {
1771  result = tok.value();
1772  break;
1773  }
1774  return result;
1775  }
1776 
1777 private:
1778  void impl_reset() {
1779  Size str_size_ = this->string_.size_;
1780  Size& ind_ = this->index_;
1781  ind_ = 0;
1782  if (ind_ >= str_size_)
1783  ind_ = END;
1784  }
1785 };
1786 
1788 
1854 #define EVO_TOK_NEXT_OR_BREAK(TOK, DELIM) if (!TOK.next(DELIM)) break
1855 
1895 #define EVO_TOK_OR_BREAK(EXPR) if (!EXPR) break
1896 
1898 
1899 }
1900 #endif
StrTokLine(const BaseType &src)
Copy constructor.
Definition: strtok.h:1666
C convert() const
Convert string to value of given type.
Definition: substring.h:1427
StrTokR()
Default constructor creates empty tokenizer.
Definition: strtok.h:539
StrTokVariant()
Definition: strtok.h:1406
StrTokS ThisType
This type.
Definition: strtok.h:928
StrTok(const ThisType &src)
Copy constructor.
Definition: strtok.h:125
ThisType & operator=(const ThisType &src)
Assignment/Copy operator.
Definition: strtok.h:564
static C::Size split(C &items, const T &str)
Split string lines into list using next().
Definition: strtok.h:1749
StrTokLine(const StringBase &str)
Constructor to start tokenizing given string.
Definition: strtok.h:1670
bool nextany(const StringBase &delims)
Find next token using any of given delimiters.
Definition: strtok.h:1072
const char * str_scan_endq_r(const char *&startq, const char *&endq, const char *str, const char *end, char delim)
Scan string pointer in reverse and extract quoted or unquoted text with a delimiter and return new en...
Definition: strscan.h:1362
const char * str_scan_delim(const char *str, const char *end, char delim1, char delim2)
Scan string pointer for next delimiter and return stop pointer.
Definition: strscan.h:1037
StrTokVariant(const ThisType &src)
Definition: strtok.h:1408
Size skipws()
Advance current position for next token by skipping whitespace.
Definition: strtok.h:438
Evo SubString container.
StrTokLine()
Default constructor creates empty tokenizer.
Definition: strtok.h:1658
StrTokBase BaseType
Base type.
Definition: strtok.h:535
bool nextw(char delim)
Find next token using word delimiter (in reverse order).
Definition: strtok.h:691
Nullable< T > & set()
Set as null.
Definition: type.h:342
StrTokVariant(const StringBase &str)
Definition: strtok.h:1412
bool next()
Find next token by finding next newline or newline pair.
Definition: strtok.h:1705
static C::Size split(C &items, const T &str, char delim=',')
Split delimited string into item list using next().
Definition: strtok.h:459
ThisType & reset()
Reset to tokenize from beginning of string.
Definition: strtok.h:983
static SubString splitat(const T &str, Size index)
Split string lines to extract token at line index.
Definition: strtok.h:1766
Basic character type (char) – see CharT.
Definition: type.h:775
ThisType & operator=(const ThisType &src)
Assignment/Copy operator.
Definition: strtok.h:1674
ThisType & operator=(const StringBase &str)
Assignment operator to start tokenizing given string from beginning.
Definition: strtok.h:1682
StrTok(const StringBase &str)
Constructor to start tokenizing given string.
Definition: strtok.h:139
ThisType & operator=(const StringBase &str)
Assignment operator to start tokenizing given string from beginning.
Definition: strtok.h:160
ThisType & reset()
Reset to tokenize from beginning of string.
Definition: strtok.h:171
StrTokLine(const ThisType &src)
Copy constructor.
Definition: strtok.h:1662
String tokenizer adapter used internally to create variants of existing tokenizers – do not use dire...
Definition: strtok.h:1401
SubString::Size Size
String size type
Definition: strtok.h:116
StrTokRS ThisType
This type.
Definition: strtok.h:1178
StrTokVariant< StrTokR,&StrTokR::nextq,&StrTokR::nextanyq > StrTokQR
String reverse tokenizer based on StrTokR with quoted token support.
Definition: strtok.h:1501
StrTok(const BaseType &src)
Copy constructor.
Definition: strtok.h:131
StrTokBase BaseType
Base type.
Definition: strtok.h:1179
StrTokBase()
Default constructor creates empty tokenizer.
Definition: strtok.h:52
SubString::Size Size
String size type
Definition: strtok.h:25
ThisType & operator=(const BaseType &src)
Assignment/Copy operator.
Definition: strtok.h:1678
Key findany(const char *chars, Size count, Key start=0, Key end=END) const
Find first occurrence of any given characters with forward search.
Definition: substring.h:828
ThisType & operator=(const ThisType &src)
Assignment/Copy operator.
Definition: strtok.h:958
bool next(char delim)
Find next token using delimiter.
Definition: strtok.h:994
StrTok()
Default constructor creates empty tokenizer.
Definition: strtok.h:119
String line tokenizer.
Definition: strtok.h:1651
StrTokR(const ThisType &src)
Copy constructor.
Definition: strtok.h:545
StrTokBase BaseType
Base type.
Definition: strtok.h:24
StrTokRS()
Default constructor creates empty tokenizer.
Definition: strtok.h:1183
static SubString splitat(const T &str, Size index, char delim=',')
Split delimited string to extract token at index (in reverse order).
Definition: strtok.h:1380
static C::Size split(C &items, const S &str, char delim=',')
Definition: strtok.h:1428
StrTokVariant(const BaseType &src)
Definition: strtok.h:1410
Size index() const
Get current index before next token.
Definition: strtok.h:30
StrTokR(const StringBase &str)
Constructor to start tokenizing given string.
Definition: strtok.h:557
ThisType & operator=(const StringBase &str)
Assignment operator to start tokenizing given string from beginning.
Definition: strtok.h:972
StrTokBase BaseType
Base type.
Definition: strtok.h:929
bool nextw(char delim)
Find next token using word delimiter.
Definition: strtok.h:1031
StrSizeT Size
List size integer type.
Definition: sublist.h:150
uint32 StrSizeT
Default Evo string size type.
Definition: sys.h:734
const char * string_memrchr(const char *str, char ch, size_t size)
Evo implementation of memrchr() to search for character in reverse.
Definition: str.h:1150
SubString::Size Size
String size type
Definition: strtok.h:1180
bool nextw(char delim)
Find next token using word delimiter (in reverse order).
Definition: strtok.h:1280
bool next(char delim)
Find next token using delimiter (in reverse order).
Definition: strtok.h:600
ThisType & operator=(const BaseType &src)
Assignment/Copy operator.
Definition: strtok.h:153
bool nextq(char delim)
Find next token using delimiter with quoting support.
Definition: strtok.h:231
ThisType & operator=(const ThisType &src)
Definition: strtok.h:1415
SubString & stripl()
Strip left (beginning) whitespace (spaces and tabs).
Definition: substring.h:1090
bool nextanyq(const StringBase &delims)
Find next token using any of given delimiters with quoting support.
Definition: strtok.h:428
SubString & stripr()
Strip right (ending) whitespace (spaces and tabs).
Definition: substring.h:1135
SubString::Size Size
String size type
Definition: strtok.h:1655
static const EndT END
Special integer value for indicating end of items or no item.
Definition: type.h:1846
SubString value_
Current value.
Definition: strtok.h:49
bool nextanyq(const StringBase &delims)
Find next token using any of given delimiters (in reverse order) with quoting support.
Definition: strtok.h:838
StrTokS(const ThisType &src)
Copy constructor.
Definition: strtok.h:939
SubString string_
String being tokenized, NULL for none
Definition: strtok.h:46
void copy(const BaseType &src)
Copy data.
Definition: strtok.h:72
StrTokBase(const BaseType &src)
Copy constructor.
Definition: strtok.h:64
const SubString & value() const
Get current token value from last call to next().
Definition: strtok.h:42
const char * str_scan_nws_r(const char *str, const char *end)
Scan string pointer for next non-whitespace character in reverse and return new end after stop pointe...
Definition: strscan.h:896
StrTokBase BaseType
Base type.
Definition: strtok.h:1654
const char * str_scan_endq(const char *&startq, const char *&endq, const char *str, const char *end)
Scan string pointer and extract quoted or unquoted text.
Definition: strscan.h:1172
StrTokVariant< StrTokR,&StrTokR::nextw > StrTokWordR
String reverse word tokenizer based on StrTokR.
Definition: strtok.h:1559
bool nextany(const StringBase &delims)
Find next token using any of given delimiters (in reverse order).
Definition: strtok.h:1319
StrTokBase BaseType
Root base type.
Definition: strtok.h:1403
bool nextanyq(const StringBase &delims, char ws_delim)
Find next token using any of given delimiters with quoting support.
Definition: strtok.h:369
static C::Size split(C &items, const T &str, char delim=',')
Split delimited string into item list using next() (in reverse order).
Definition: strtok.h:854
String reverse tokenizer (strict).
Definition: strtok.h:1176
ThisType & reset()
Reset to tokenize from beginning of string.
Definition: strtok.h:1691
bool nextanyq(const StringBase &delims, char ws_delim)
Find next token using any of given delimiters (in reverse order) with quoting support.
Definition: strtok.h:779
bool next(char delim)
Definition: strtok.h:1422
ThisType & operator=(const BaseType &src)
Definition: strtok.h:1417
ThisType & operator=(const ThisType &src)
Assignment/Copy operator.
Definition: strtok.h:146
bool nextw(char delim)
Find next token using word delimiter.
Definition: strtok.h:278
Evo C++ Library namespace.
Definition: alg.h:11
static const EndT NONE
Special integer value for indicating no item or unknown item.
Definition: type.h:1832
StrTokBase BaseType
Base type.
Definition: strtok.h:115
String forward tokenizer (strict).
Definition: strtok.h:926
ThisType & reset()
Reset to tokenize from beginning of string.
Definition: strtok.h:1233
SubString::Size Size
String size type
Definition: strtok.h:1404
StrTokVariant< T, NextCh, NextAny > ThisType
This type.
Definition: strtok.h:1402
SubString::Size Size
String size type
Definition: strtok.h:930
ThisType & operator=(const StringBase &str)
Assignment operator to start tokenizing given string from end.
Definition: strtok.h:1222
ThisType & operator=(const BaseType &src)
Assignment/Copy operator.
Definition: strtok.h:1215
ThisType & operator=(const StringBase &str)
Definition: strtok.h:1419
StrTokVariant< StrTok,&StrTok::nextq,&StrTok::nextanyq > StrTokQ
String forward tokenizer based on StrTok with quoted token support.
Definition: strtok.h:1474
bool nextany(const StringBase &delims)
Find next token using any of given delimiters (in reverse order).
Definition: strtok.h:732
bool nextany(const StringBase &delims)
Find next token using any of given delimiters.
Definition: strtok.h:321
String reverse tokenizer.
Definition: strtok.h:532
Char delim() const
Get current delimiter before next token.
Definition: strtok.h:36
SubString & strip()
Strip left (beginning) and right (ending) whitespace (spaces and tabs).
Definition: substring.h:1061
ThisType & operator=(const StringBase &str)
Assignment operator to start tokenizing given string from end.
Definition: strtok.h:578
bool next(char delim)
Find next token using delimiter.
Definition: strtok.h:184
SubString & setempty()
Set as empty but not null.
Definition: substring.h:1303
T * data_
Data pointer, NULL if null.
Definition: sys.h:979
StrTokVariant< StrTokRS,&StrTokRS::nextw > StrTokWordRS
String reverse word tokenizer based on StrTokRS (strict).
Definition: strtok.h:1615
String forward tokenizer.
Definition: strtok.h:112
StrTokLine ThisType
This type.
Definition: strtok.h:1653
static SubString splitat(const T &str, Size index, char delim=',')
Split delimited string to extract token at index (in reverse order).
Definition: strtok.h:872
bool nextany(const StringBase &delims)
Definition: strtok.h:1424
StrTokVariant< StrTokS,&StrTokS::nextw > StrTokWordS
String forward word tokenizer based on StrTokS (strict).
Definition: strtok.h:1587
StrTokRS(const ThisType &src)
Copy constructor.
Definition: strtok.h:1189
bool next(char delim)
Find next token using delimiter (in reverse order).
Definition: strtok.h:1244
ThisType & operator=(const BaseType &src)
Assignment/Copy operator.
Definition: strtok.h:571
SubString::Size Size
String size type
Definition: strtok.h:536
TSize size_
Data size as item count, 0 if empty or null.
Definition: sys.h:980
StrTokR ThisType
This type.
Definition: strtok.h:534
ThisType & reset()
Reset to tokenize from beginning of string.
Definition: strtok.h:589
ThisType & operator=(const ThisType &src)
Assignment/Copy operator.
Definition: strtok.h:1208
StrTokRS(const StringBase &str)
Constructor to start tokenizing given string.
Definition: strtok.h:1201
static C::Size split(C &items, const T &str, char delim=',')
Split delimited string into item list using next() (in reverse order).
Definition: strtok.h:1362
const char * str_scan_delim_r(const char *str, const char *end, char delim1, char delim2)
Scan string pointer for next delimiter in reverse and return new end after stop pointer.
Definition: strscan.h:1058
StrTokS()
Default constructor creates empty tokenizer.
Definition: strtok.h:933
bool nextq(char delim)
Find next token with quoting support using delimiter (in reverse order) with quoting support...
Definition: strtok.h:644
static SubString splitat(const S &str, Size index, char delim=',')
Definition: strtok.h:1437
StrTokVariant< StrTok,&StrTok::nextw > StrTokWord
String forward word tokenizer based on StrTok.
Definition: strtok.h:1531
StrTokS(const StringBase &str)
Constructor to start tokenizing given string.
Definition: strtok.h:951
SubString & set(const char *data)
Set as reference to terminated string.
Definition: substring.h:353
StrTokS(const BaseType &src)
Copy constructor.
Definition: strtok.h:945
Char delim_
Current delimiter, null when none or at end.
Definition: strtok.h:48
Base tokenizer class – see StrTok and StrTokR.
Definition: strtok.h:22
Reference and access existing string data.
Definition: substring.h:229
static C::Size split(C &items, const T &str, char delim=',')
Split delimited string into item list using next().
Definition: strtok.h:1116
static SubString splitat(const T &str, Size index, char delim=',')
Split delimited string to extract token at index.
Definition: strtok.h:1134
Base for all Evo list types (used internally).
Definition: sys.h:976
Size index_
Current index, END when at end.
Definition: strtok.h:47
StrTokBase(const StringBase &string)
Default constructor creates empty tokenizer.
Definition: strtok.h:56
ThisType & operator=(const BaseType &src)
Assignment/Copy operator.
Definition: strtok.h:965
StrTok ThisType
This type.
Definition: strtok.h:114
StrTokR(const BaseType &src)
Copy constructor.
Definition: strtok.h:551
static SubString splitat(const T &str, Size index, char delim=',')
Split delimited string to extract token at index.
Definition: strtok.h:477
StrTokRS(const BaseType &src)
Copy constructor.
Definition: strtok.h:1195