libfilezilla
Loading...
Searching...
No Matches
string.hpp
Go to the documentation of this file.
1#ifndef LIBFILEZILLA_STRING_HEADER
2#define LIBFILEZILLA_STRING_HEADER
3
4#include "libfilezilla.hpp"
5
6#include <algorithm>
7#include <cstdint>
8#include <string>
9#include <string_view>
10#include <vector>
11
19namespace fz {
20
33#ifdef FZ_WINDOWS
34typedef std::wstring native_string;
35typedef std::wstring_view native_string_view;
36#endif
37#if defined(FZ_UNIX) || defined(FZ_MAC)
38typedef std::string native_string;
39typedef std::string_view native_string_view;
40#endif
41
46native_string FZ_PUBLIC_SYMBOL to_native(std::string_view const& in);
47
52native_string FZ_PUBLIC_SYMBOL to_native(std::wstring_view const& in);
53
55template<typename T, typename std::enable_if_t<std::is_same_v<native_string, typename std::decay_t<T>>, int> = 0>
56inline native_string to_native(T const& in) {
57 return in;
58}
59
66int FZ_PUBLIC_SYMBOL stricmp(std::string_view const& a, std::string_view const& b);
67int FZ_PUBLIC_SYMBOL stricmp(std::wstring_view const& a, std::wstring_view const& b);
68
86template<typename Char>
87Char tolower_ascii(Char c) {
88 if (c >= 'A' && c <= 'Z') {
89 return c + ('a' - 'A');
90 }
91 return c;
92}
93
94template<>
95std::wstring::value_type FZ_PUBLIC_SYMBOL tolower_ascii(std::wstring::value_type c);
96
98template<typename Char>
99Char toupper_ascii(Char c) {
100 if (c >= 'a' && c <= 'z') {
101 return c + ('A' - 'a');
102 }
103 return c;
104}
105
106template<>
107std::wstring::value_type FZ_PUBLIC_SYMBOL toupper_ascii(std::wstring::value_type c);
108
111 // Note: For UTF-8 strings it works on individual octets!
112std::string FZ_PUBLIC_SYMBOL str_tolower_ascii(std::string_view const& s);
113std::wstring FZ_PUBLIC_SYMBOL str_tolower_ascii(std::wstring_view const& s);
114
115std::string FZ_PUBLIC_SYMBOL str_toupper_ascii(std::string_view const& s);
116std::wstring FZ_PUBLIC_SYMBOL str_toupper_ascii(std::wstring_view const& s);
117
123struct FZ_PUBLIC_SYMBOL less_insensitive_ascii final
124{
125 template<typename T>
126 bool operator()(T const& lhs, T const& rhs) const {
127 return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend(),
128 [](typename T::value_type const& a, typename T::value_type const& b) {
129 return tolower_ascii(a) < tolower_ascii(b);
130 }
131 );
132 }
133};
134
139inline bool equal_insensitive_ascii(std::string_view a, std::string_view b)
140{
141 return std::equal(a.cbegin(), a.cend(), b.cbegin(), b.cend(),
142 [](auto const& a, auto const& b) {
143 return tolower_ascii(a) == tolower_ascii(b);
144 }
145 );
146}
147inline bool equal_insensitive_ascii(std::wstring_view a, std::wstring_view b)
148{
149 return std::equal(a.cbegin(), a.cend(), b.cbegin(), b.cend(),
150 [](auto const& a, auto const& b) {
151 return tolower_ascii(a) == tolower_ascii(b);
152 }
153 );
154}
155
160std::wstring FZ_PUBLIC_SYMBOL to_wstring(std::string_view const& in);
161
166template <typename T>
167inline auto to_wstring(T && in) -> decltype(std::wstring(std::forward<T>(in)))
168{
169 return std::wstring(std::forward<T>(in));
170}
171
173template<typename Arg>
174inline typename std::enable_if<std::is_arithmetic_v<std::decay_t<Arg>>, std::wstring>::type to_wstring(Arg && arg)
175{
176 return std::to_wstring(std::forward<Arg>(arg));
177}
178
179
184std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(std::string_view const& in);
185std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(char const* s, size_t len);
186
187class buffer;
188std::wstring FZ_PUBLIC_SYMBOL to_wstring_from_utf8(fz::buffer const& in);
189
194std::string FZ_PUBLIC_SYMBOL to_string(std::wstring_view const& in);
195
200template <typename T>
201inline auto to_string(T && in) -> decltype(std::string(std::forward<T>(in)))
202{
203 return std::string(std::forward<T>(in));
204}
205
206
208template<typename Arg>
209inline typename std::enable_if<std::is_arithmetic_v<std::decay_t<Arg>>, std::string>::type to_string(Arg && arg)
210{
211 return std::to_string(std::forward<Arg>(arg));
212}
213
214
216template<typename Char>
217size_t strlen(Char const* str) {
218 return std::char_traits<Char>::length(str);
219}
220
221
228std::string FZ_PUBLIC_SYMBOL to_utf8(std::string_view const& in);
229
236std::string FZ_PUBLIC_SYMBOL to_utf8(std::wstring_view const& in);
237
239template<typename String, typename Arg>
240inline auto toString(Arg&& arg) -> typename std::enable_if<std::is_same_v<String, std::string>, decltype(to_string(std::forward<Arg>(arg)))>::type
241{
242 return to_string(std::forward<Arg>(arg));
243}
244
245template<typename String, typename Arg>
246inline auto toString(Arg&& arg) -> typename std::enable_if<std::is_same_v<String, std::wstring>, decltype(to_wstring(std::forward<Arg>(arg)))>::type
247{
248 return to_wstring(std::forward<Arg>(arg));
249}
250
251#if !defined(fzT) || defined(DOXYGEN)
252#ifdef FZ_WINDOWS
257#define fzT(x) L ## x
258#else
263#define fzT(x) x
264#endif
265#endif
266
268template<typename Char>
269constexpr Char const* choose_string(char const* c, wchar_t const* w);
270
271template<> constexpr inline char const* choose_string(char const* c, wchar_t const*) { return c; }
272template<> constexpr inline wchar_t const* choose_string(char const*, wchar_t const* w) { return w; }
273
274#if !defined(fzS) || defined(DOXYGEN)
286#define fzS(Char, s) fz::choose_string<Char>(s, L ## s)
287#endif
288
293std::string FZ_PUBLIC_SYMBOL replaced_substrings(std::string_view const& in, std::string_view const& find, std::string_view const& replacement);
294std::wstring FZ_PUBLIC_SYMBOL replaced_substrings(std::wstring_view const& in, std::wstring_view const& find, std::wstring_view const& replacement);
295
297std::string FZ_PUBLIC_SYMBOL replaced_substrings(std::string_view const& in, char find, char replacement);
298std::wstring FZ_PUBLIC_SYMBOL replaced_substrings(std::wstring_view const& in, wchar_t find, wchar_t replacement);
299
304bool FZ_PUBLIC_SYMBOL replace_substrings(std::string& in, std::string_view const& find, std::string_view const& replacement);
305bool FZ_PUBLIC_SYMBOL replace_substrings(std::wstring& in, std::wstring_view const& find, std::wstring_view const& replacement);
306
308bool FZ_PUBLIC_SYMBOL replace_substrings(std::string& in, char find, char replacement);
309bool FZ_PUBLIC_SYMBOL replace_substrings(std::wstring& in, wchar_t find, wchar_t replacement);
310
337template <typename String, typename Delims>
339{
340 using view_type = std::basic_string_view<std::decay_t<decltype(std::declval<String>()[0])>>;
341
342public:
349 constexpr strtokenizer(String && string, Delims &&delims, bool ignore_empty)
350 : string_(std::forward<String>(string))
351 , delims_(std::forward<Delims>(delims))
352 , ignore_empty_(ignore_empty)
353 {}
354
355 using value_type = const view_type;
356 using pointer = value_type*;
357 using reference = value_type&;
358 using size_type = std::size_t;
359 using difference_type = std::ptrdiff_t;
360
361 struct sentinel{};
362
363 struct iterator
364 {
365 using iterator_category = std::input_iterator_tag;
366 using difference_type = strtokenizer::difference_type;
367 using value_type = strtokenizer::value_type;
368 using pointer = strtokenizer::pointer;
369 using reference = strtokenizer::reference;
370
371 constexpr bool operator !=(sentinel) const
372 {
373 return !s_.empty();
374 }
375
376 constexpr bool operator ==(sentinel) const
377 {
378 return s_.empty();
379 }
380
381 constexpr bool operator ==(iterator const& op) const
382 {
383 return s_.size() == op.s_.size();
384 }
385
386 constexpr bool operator !=(iterator const& op) const
387 {
388 return s_.size() != op.s_.size();
389 }
390
391 constexpr value_type operator*() const
392 {
393 return s_.substr(0, pos_);
394 }
395
396 constexpr iterator &operator++()
397 {
398 for (;;) {
399 if (pos_ != s_.size()) {
400 ++pos_;
401 }
402
403 s_.remove_prefix(pos_);
404
405 pos_ = s_.find_first_of(t_->delims_);
406
407 if (pos_ == view_type::npos) {
408 pos_ = s_.size();
409 break;
410 }
411
412 if (pos_ != 0 || !t_->ignore_empty_) {
413 break;
414 }
415 }
416
417 return *this;
418 }
419
420 private:
421 friend strtokenizer;
422
423 constexpr iterator(const strtokenizer *t)
424 : t_(t)
425 , s_(view_type(t_->string_))
426 , pos_(view_type::npos)
427 {
428 operator++();
429 }
430
431 const strtokenizer *t_;
432 view_type s_;
433 size_type pos_;
434 };
435
436 using const_value_type = value_type;
437 using const_pointer = pointer;
438 using const_reference = reference;
439 using const_iterator = iterator;
440
441 constexpr iterator begin() const
442 {
443 return { this };
444 }
445
446 constexpr sentinel end() const
447 {
448 return {};
449 }
450
451 constexpr const_iterator cbegin() const
452 {
453 return { this };
454 }
455
456 constexpr sentinel cend() const
457 {
458 return {};
459 }
460
461public:
462 String string_;
463 Delims delims_;
464 bool ignore_empty_;
465};
466
473template <typename String, typename Delims>
474strtokenizer(String && string, Delims &&delims, bool ignore_empty) -> strtokenizer<String, Delims>;
475
482std::vector<std::string> FZ_PUBLIC_SYMBOL strtok(std::string_view const& tokens, std::string_view const& delims, bool const ignore_empty = true);
483std::vector<std::wstring> FZ_PUBLIC_SYMBOL strtok(std::wstring_view const& tokens, std::wstring_view const& delims, bool const ignore_empty = true);
484inline auto FZ_PUBLIC_SYMBOL strtok(std::string_view const& tokens, char const delim, bool const ignore_empty = true) {
485 return strtok(tokens, std::string_view(&delim, 1), ignore_empty);
486}
487inline auto FZ_PUBLIC_SYMBOL strtok(std::wstring_view const& tokens, wchar_t const delim, bool const ignore_empty = true) {
488 return strtok(tokens, std::wstring_view(&delim, 1), ignore_empty);
489}
490
499std::vector<std::string_view> FZ_PUBLIC_SYMBOL strtok_view(std::string_view const& tokens, std::string_view const& delims, bool const ignore_empty = true);
500std::vector<std::wstring_view> FZ_PUBLIC_SYMBOL strtok_view(std::wstring_view const& tokens, std::wstring_view const& delims, bool const ignore_empty = true);
501inline auto FZ_PUBLIC_SYMBOL strtok_view(std::string_view const& tokens, char const delim, bool const ignore_empty = true) {
502 return strtok_view(tokens, std::string_view(&delim, 1), ignore_empty);
503}
504inline auto FZ_PUBLIC_SYMBOL strtok_view(std::wstring_view const& tokens, wchar_t const delim, bool const ignore_empty = true) {
505 return strtok_view(tokens, std::wstring_view(&delim, 1), ignore_empty);
506}
507
509template<typename T, typename String>
510T to_integral_impl(String const& s, T const errorval = T())
511{
512 if constexpr (std::is_same_v<T, bool>) {
513 return static_cast<T>(to_integral_impl<unsigned int>(s, static_cast<unsigned int>(errorval))) != 0;
514 }
515 else if constexpr (std::is_enum_v<T>) {
516 return static_cast<T>(to_integral_impl<std::underlying_type_t<T>>(s, static_cast<std::underlying_type_t<T>>(errorval)));
517 }
518 else {
519 T ret{};
520 auto it = s.cbegin();
521 if (it != s.cend() && (*it == '-' || *it == '+')) {
522 ++it;
523 }
524
525 if (it == s.cend()) {
526 return errorval;
527 }
528
529 for (; it != s.cend(); ++it) {
530 auto const& c = *it;
531 if (c < '0' || c > '9') {
532 return errorval;
533 }
534 ret *= 10;
535 ret += c - '0';
536 }
537
538 if (!s.empty() && s.front() == '-') {
539 ret *= static_cast<T>(-1);
540 }
541 return ret;
542 }
543}
544
546template<typename T>
547T to_integral(std::string_view const& s, T const errorval = T()) {
548 return to_integral_impl<T>(s, errorval);
549}
550
551template<typename T>
552T to_integral(std::wstring_view const& s, T const errorval = T()) {
553 return to_integral_impl<T>(s, errorval);
554}
555
556template<typename T, typename StringType>
557T to_integral(std::basic_string_view<StringType> const& s, T const errorval = T()) {
558 return to_integral_impl<T>(s, errorval);
559}
560
561
563template<typename String>
564bool str_is_ascii(String const& s) {
565 for (auto const& c : s) {
566 if (static_cast<std::make_unsigned_t<typename String::value_type>>(c) > 127) {
567 return false;
568 }
569 }
570
571 return true;
572}
573
575template<typename String, typename Chars>
576void trim_impl(String & s, Chars const& chars, bool fromLeft, bool fromRight) {
577 size_t const first = fromLeft ? s.find_first_not_of(chars) : 0;
578 if (first == String::npos) {
579 s = String();
580 return;
581 }
582
583 size_t const last = fromRight ? s.find_last_not_of(chars) : s.size();
584 if (last == String::npos) {
585 s = String();
586 return;
587 }
588
589 // Invariant: If first exists, then last >= first
590 s = s.substr(first, last - first + 1);
591}
592
594inline std::string FZ_PUBLIC_SYMBOL trimmed(std::string_view s, std::string_view const& chars = " \r\n\t", bool fromLeft = true, bool fromRight = true)
595{
596 trim_impl(s, chars, fromLeft, fromRight);
597 return std::string(s);
598}
599
600inline std::wstring FZ_PUBLIC_SYMBOL trimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t", bool fromLeft = true, bool fromRight = true)
601{
602 trim_impl(s, chars, fromLeft, fromRight);
603 return std::wstring(s);
604}
605
606inline std::string FZ_PUBLIC_SYMBOL ltrimmed(std::string_view s, std::string_view const& chars = " \r\n\t")
607{
608 trim_impl(s, chars, true, false);
609 return std::string(s);
610}
611
612inline std::wstring FZ_PUBLIC_SYMBOL ltrimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t")
613{
614 trim_impl(s, chars, true, false);
615 return std::wstring(s);
616}
617
618inline std::string FZ_PUBLIC_SYMBOL rtrimmed(std::string_view s, std::string_view const& chars = " \r\n\t")
619{
620 trim_impl(s, chars, false, true);
621 return std::string(s);
622}
623
624inline std::wstring FZ_PUBLIC_SYMBOL rtrimmed(std::wstring_view s, std::wstring_view const& chars = L" \r\n\t")
625{
626 trim_impl(s, chars, false, true);
627 return std::wstring(s);
628}
629
630
632template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
633inline void trim(String & s, std::string_view const& chars = " \r\n\t", bool fromLeft = true, bool fromRight = true)
634{
635 trim_impl(s, chars, fromLeft, fromRight);
636}
637
638template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
639inline void trim(String & s, std::wstring_view const& chars = L" \r\n\t", bool fromLeft = true, bool fromRight = true)
640{
641 trim_impl(s, chars, fromLeft, fromRight);
642}
643
644template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
645inline void ltrim(String& s, std::string_view const& chars = " \r\n\t")
646{
647 trim_impl(s, chars, true, false);
648}
649
650template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
651inline void ltrim(String& s, std::wstring_view const& chars = L" \r\n\t")
652{
653 trim_impl(s, chars, true, false);
654}
655
656template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, char>, int> = 0>
657inline void rtrim(String& s, std::string_view const& chars = " \r\n\t")
658{
659 trim_impl(s, chars, false, true);
660}
661
662template<typename String, typename std::enable_if_t<std::is_same_v<typename String::value_type, wchar_t>, int> = 0>
663inline void rtrim(String & s, std::wstring_view const& chars = L" \r\n\t")
664{
665 trim_impl(s, chars, false, true);
666}
667
672template<bool insensitive_ascii = false, typename String>
673bool starts_with(String const& s, String const& beginning)
674{
675 if (beginning.size() > s.size()) {
676 return false;
677 }
678 if constexpr (insensitive_ascii) {
679 return std::equal(beginning.begin(), beginning.end(), s.begin(), [](typename String::value_type const& a, typename String::value_type const& b) {
680 return tolower_ascii(a) == tolower_ascii(b);
681 });
682 }
683 else {
684 return std::equal(beginning.begin(), beginning.end(), s.begin());
685 }
686}
687
692template<bool insensitive_ascii = false, typename String>
693bool ends_with(String const& s, String const& ending)
694{
695 if (ending.size() > s.size()) {
696 return false;
697 }
698
699 if constexpr (insensitive_ascii) {
700 return std::equal(ending.rbegin(), ending.rend(), s.rbegin(), [](typename String::value_type const& a, typename String::value_type const& b) {
701 return tolower_ascii(a) == tolower_ascii(b);
702 });
703 }
704 else {
705 return std::equal(ending.rbegin(), ending.rend(), s.rbegin());
706 }
707}
708
714std::string FZ_PUBLIC_SYMBOL normalize_hyphens(std::string_view const& in);
715std::wstring FZ_PUBLIC_SYMBOL normalize_hyphens(std::wstring_view const& in);
716
718bool FZ_PUBLIC_SYMBOL is_valid_utf8(std::string_view s);
719
740bool FZ_PUBLIC_SYMBOL is_valid_utf8(std::string_view s, size_t & state);
741
747void FZ_PUBLIC_SYMBOL unicode_codepoint_to_utf8_append(std::string& result, uint32_t codepoint);
748
769bool FZ_PUBLIC_SYMBOL utf16be_to_utf8_append(std::string & result, std::string_view data, uint32_t & state);
770
772bool FZ_PUBLIC_SYMBOL utf16le_to_utf8_append(std::string & result, std::string_view data, uint32_t & state);
773
774inline native_string to_native_from_utf8(std::string_view s) {
775#ifdef FZ_WINDOWS
776 return to_wstring_from_utf8(s);
777#else
779#endif
780}
781
782}
783
784#endif
The buffer class is a simple buffer where data can be appended at the end and consumed at the front....
Definition buffer.hpp:27
Small class to return filesystem errors.
Definition fsresult.hpp:26
Container-like class that can be used to iterate over tokens in a string.
Definition string.hpp:339
constexpr strtokenizer(String &&string, Delims &&delims, bool ignore_empty)
strtokenizer class constructor.
Definition string.hpp:349
Sets some global macros and further includes string.hpp.
The namespace used by libfilezilla.
Definition apply.hpp:17
size_t strlen(Char const *str)
Returns length of 0-terminated character sequence. Works with both narrow and wide-characters.
Definition string.hpp:217
Char toupper_ascii(Char c)
Converts ASCII lowercase characters to uppercase as if C-locale is used.
Definition string.hpp:99
bool utf16le_to_utf8_append(std::string &result, std::string_view data, uint32_t &state)
Just as utf16be_to_utf8_append but for little-endian UTF-16.
std::vector< std::string_view > strtok_view(std::string_view const &tokens, std::string_view const &delims, bool const ignore_empty=true)
Tokenizes string.
Char tolower_ascii(Char c)
Converts ASCII uppercase characters to lowercase as if C-locale is used.
Definition string.hpp:87
bool str_is_ascii(String const &s)
Returns true iff the string only has characters in the 7-bit ASCII range.
Definition string.hpp:564
strtokenizer(String &&string, Delims &&delims, bool ignore_empty) -> strtokenizer< String, Delims >
strtokenizer class construction-guide.
auto toString(Arg &&arg) -> typename std::enable_if< std::is_same_v< String, std::string >, decltype(to_string(std::forward< Arg >(arg)))>::type
Calls either fz::to_string or fz::to_wstring depending on the passed template argument.
Definition string.hpp:240
constexpr Char const * choose_string(char const *c, wchar_t const *w)
Returns the function argument of the type matching the template argument.
void trim(String &s, std::string_view const &chars=" \r\n\t", bool fromLeft=true, bool fromRight=true)
Remove all leading and trailing whitespace from string.
Definition string.hpp:633
bool is_valid_utf8(std::string_view s)
Verifies that the input data is valid UTF-8.
std::string trimmed(std::string_view s, std::string_view const &chars=" \r\n\t", bool fromLeft=true, bool fromRight=true)
Return passed string with all leading and trailing whitespace removed.
Definition string.hpp:594
std::wstring to_wstring_from_utf8(std::string_view const &in)
Converts from std::string in UTF-8 into std::wstring.
std::string normalize_hyphens(std::string_view const &in)
std::wstring native_string
A string in the system's native character type and encoding. Note: This typedef changes depending on...
Definition string.hpp:34
bool utf16be_to_utf8_append(std::string &result, std::string_view data, uint32_t &state)
Converts from UTF-16-BE and appends it to the passed string.
std::string to_utf8(std::string_view const &in)
Converts from std::string in native encoding into std::string in UTF-8.
bool ends_with(String const &s, String const &ending)
Tests whether the first string ends with the second string.
Definition string.hpp:693
bool equal_insensitive_ascii(std::string_view a, std::string_view b)
Locale-insensitive stricmp.
Definition string.hpp:139
std::vector< std::string > strtok(std::string_view const &tokens, std::string_view const &delims, bool const ignore_empty=true)
Tokenizes string.
std::string to_string(std::wstring_view const &in)
Converts from std::wstring into std::string in system encoding.
std::wstring to_wstring(std::string_view const &in)
Converts from std::string in system encoding into std::wstring.
std::string replaced_substrings(std::string_view const &in, std::string_view const &find, std::string_view const &replacement)
Returns in with all occurrences of find in the input string replaced with replacement.
bool starts_with(String const &s, String const &beginning)
Tests whether the first string starts with the second string.
Definition string.hpp:673
bool replace_substrings(std::string &in, std::string_view const &find, std::string_view const &replacement)
Modifies in, replacing all occurrences of find with replacement.
int stricmp(std::string_view const &a, std::string_view const &b)
Locale-sensitive stricmp.
std::string str_tolower_ascii(std::string_view const &s)
tr_tolower_ascii does for strings what tolower_ascii does for individual characters
void unicode_codepoint_to_utf8_append(std::string &result, uint32_t codepoint)
Encodes a valid Unicode codepoint as UTF-8 and appends it to the passed string.
native_string to_native(std::string_view const &in)
Converts std::string to native_string.
T to_integral(std::string_view const &s, T const errorval=T())
Converts string to integral type T. If string is not convertible, errorval is returned.
Definition string.hpp:547
Comparator to be used for std::map for case-insensitive keys.
Definition string.hpp:124
Definition string.hpp:364
Definition string.hpp:361