1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 /**
6  * UTF-8 encoded string.
7  *
8  * You can create a $(D_PSYMBOL String) from a literal string, single character
9  * or character range. Characters can be of the type $(D_KEYWORD char),
10  * $(D_KEYWORD wchar) or $(D_KEYWORD dchar). Literal strings, characters and
11  * character ranges can be also inserted into an existing string.
12  *
13  * $(D_PSYMBOL String) is always valid UTF-8. Inserting an invalid sequence
14  * or working on a corrupted $(D_PSYMBOL String) causes
15  * $(D_PSYMBOL UTFException) to be thrown.
16  *
17  * Internally $(D_PSYMBOL String) is represented by a sequence of
18  * $(D_KEYWORD char)s.
19  *
20  * Copyright: Eugene Wissner 2017-2020.
21  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
22  *                  Mozilla Public License, v. 2.0).
23  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
24  * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/string.d,
25  *                 tanya/container/string.d)
26  */
27 module tanya.container..string;
28 
29 import std.algorithm.comparison;
30 import std.algorithm.mutation : bringToFront;
31 import tanya.algorithm.mutation;
32 import tanya.hash.lookup;
33 import tanya.memory.allocator;
34 import tanya.memory.lifetime;
35 import tanya.meta.trait;
36 import tanya.meta.transform;
37 import tanya.range.array;
38 import tanya.range.primitive;
39 
40 /**
41  * Thrown on encoding errors.
42  */
43 class UTFException : Exception
44 {
45     /**
46      * Params:
47      *  msg  = The message for the exception.
48      *  file = The file where the exception occurred.
49      *  line = The line number where the exception occurred.
50      *  next = The previous exception in the chain of exceptions, if any.
51      */
52     this(string msg,
53          string file = __FILE__,
54          size_t line = __LINE__,
55          Throwable next = null) @nogc @safe pure nothrow
56     {
57         super(msg, file, line, next);
58     }
59 }
60 
61 /**
62  * Iterates $(D_PSYMBOL String) by UTF-8 code unit.
63  *
64  * Params:
65  *  E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
66  */
67 struct ByCodeUnit(E)
68 if (is(Unqual!E == char))
69 {
70     private E* begin, end;
71     private alias ContainerType = CopyConstness!(E, String);
72     private ContainerType* container;
73 
74     invariant
75     {
76         assert(this.begin <= this.end);
77         assert(this.container !is null);
78         assert(this.begin >= this.container.data);
79         assert(this.end <= this.container.data + this.container.length);
80     }
81 
82     private this(ref ContainerType container, E* begin, E* end) @trusted
83     in
84     {
85         assert(begin <= end);
86         assert(begin >= container.data);
87         assert(end <= container.data + container.length);
88     }
89     do
90     {
91         this.container = &container;
92         this.begin = begin;
93         this.end = end;
94     }
95 
96     @disable this();
97 
98     @property ByCodeUnit save()
99     {
100         return this;
101     }
102 
103     @property bool empty() const
104     {
105         return this.begin == this.end;
106     }
107 
108     @property size_t length() const
109     {
110         return this.end - this.begin;
111     }
112 
113     alias opDollar = length;
114 
115     @property ref inout(E) front() inout
116     in
117     {
118         assert(!empty);
119     }
120     do
121     {
122         return *this.begin;
123     }
124 
125     @property ref inout(E) back() inout @trusted
126     in
127     {
128         assert(!empty);
129     }
130     do
131     {
132         return *(this.end - 1);
133     }
134 
135     void popFront() @trusted
136     in
137     {
138         assert(!empty);
139     }
140     do
141     {
142         ++this.begin;
143     }
144 
145     void popBack() @trusted
146     in
147     {
148         assert(!empty);
149     }
150     do
151     {
152         --this.end;
153     }
154 
155     ref inout(E) opIndex(const size_t i) inout @trusted
156     in
157     {
158         assert(i < length);
159     }
160     do
161     {
162         return *(this.begin + i);
163     }
164 
165     ByCodeUnit opIndex()
166     {
167         return typeof(return)(*this.container, this.begin, this.end);
168     }
169 
170     ByCodeUnit!(const E) opIndex() const
171     {
172         return typeof(return)(*this.container, this.begin, this.end);
173     }
174 
175     ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
176     in
177     {
178         assert(i <= j);
179         assert(j <= length);
180     }
181     do
182     {
183         return typeof(return)(*this.container, this.begin + i, this.begin + j);
184     }
185 
186     ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
187     in
188     {
189         assert(i <= j);
190         assert(j <= length);
191     }
192     do
193     {
194         return typeof(return)(*this.container, this.begin + i, this.begin + j);
195     }
196 
197     inout(E)[] get() inout @trusted
198     {
199         return this.begin[0 .. length];
200     }
201 }
202 
203 /**
204  * Iterates $(D_PSYMBOL String) by UTF-8 code point.
205  *
206  * Params:
207  *  E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
208  */
209 struct ByCodePoint(E)
210 if (is(Unqual!E == char))
211 {
212     private E* begin, end;
213     private alias ContainerType = CopyConstness!(E, String);
214     private ContainerType* container;
215 
216     invariant
217     {
218         assert(this.begin <= this.end);
219         assert(this.container !is null);
220         assert(this.begin >= this.container.data);
221         assert(this.end <= this.container.data + this.container.length);
222     }
223 
224     private this(ref ContainerType container, E* begin, E* end) @trusted
225     in
226     {
227         assert(begin <= end);
228         assert(begin >= container.data);
229         assert(end <= container.data + container.length);
230     }
231     do
232     {
233         this.container = &container;
234         this.begin = begin;
235         this.end = end;
236     }
237 
238     @disable this();
239 
240     @property ByCodePoint save()
241     {
242         return this;
243     }
244 
245     @property bool empty() const
246     {
247         return this.begin == this.end;
248     }
249 
250     @property dchar front() const @trusted
251     in
252     {
253         assert(!empty);
254     }
255     out (chr)
256     {
257         assert(chr < 0xd800 || chr > 0xdfff);
258     }
259     do
260     {
261         dchar chr;
262         ubyte units;
263         int mask;
264         const(char)* it = this.begin;
265 
266         if (*it & 0x80)
267         {
268             mask = 0xe0;
269             for (units = 2; ((*it << units) & 0x80) != 0; ++units)
270             {
271                 mask = (mask >> 1) | 0x80;
272             }
273         }
274         if (this.begin + units > end || units > 4)
275         {
276             throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
277         }
278         chr = *it++ & ~mask;
279 
280         for (; units > 1; --units)
281         {
282             chr = (chr << 6) | (*it++ & 0x3f);
283         }
284 
285         return chr;
286     }
287 
288     void popFront() @trusted
289     in
290     {
291         assert(!empty);
292     }
293     do
294     {
295         ubyte units;
296         if ((*begin & 0xf0) == 0xf0)
297         {
298             units = 4;
299         }
300         else if ((*begin & 0xe0) == 0xe0)
301         {
302             units = 3;
303         }
304         else if ((*begin & 0xc0) == 0xc0)
305         {
306             units = 2;
307         }
308         else if ((*begin & 0x80) == 0)
309         {
310             units = 1;
311         }
312         if (units == 0 || this.begin + units > this.end)
313         {
314             throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
315         }
316         this.begin += units;
317     }
318 
319     ByCodePoint opIndex()
320     {
321         return typeof(return)(*this.container, this.begin, this.end);
322     }
323 
324     ByCodePoint!(const E) opIndex() const
325     {
326         return typeof(return)(*this.container, this.begin, this.end);
327     }
328 }
329 
330 /**
331  * UTF-8 string.
332  */
333 struct String
334 {
335     private size_t length_;
336     private char* data;
337     private size_t capacity_;
338 
339     @nogc nothrow pure @safe invariant
340     {
341         assert(this.length_ <= this.capacity_);
342     }
343 
344     /**
345      * Constructs the string from a stringish range.
346      *
347      * Params:
348      *  S         = String type.
349      *  str       = Initial string.
350      *  allocator = Allocator.
351      *
352      * Throws: $(D_PSYMBOL UTFException).
353      *
354      * Precondition: $(D_INLINECODE allocator is null).
355      */
356     this(S)(const S str, shared Allocator allocator = defaultAllocator)
357     if (!isInfinite!S
358      && isInputRange!S
359      && isSomeChar!(ElementType!S))
360     {
361         this(allocator);
362         insertBack(str);
363     }
364 
365     ///
366     @nogc pure @safe unittest
367     {
368         auto s = String("\u10437"w);
369         assert(s == "\u10437");
370     }
371 
372     ///
373     @nogc pure @safe unittest
374     {
375         auto s = String("Отказаться от вина - в этом страшная вина."d);
376         assert(s == "Отказаться от вина - в этом страшная вина.");
377     }
378 
379     /**
380      * Initializes this string from another one.
381      *
382      * If $(D_PARAM init) is passed by value, it won't be copied, but moved.
383      * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
384      * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
385      * storage, otherwise, the storage will be allocated with
386      * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
387      *
388      * If $(D_PARAM init) is passed by reference, it will be copied.
389      *
390      * Params:
391      *  S         = Source string type.
392      *  init      = Source string.
393      *  allocator = Allocator.
394      *
395      * Precondition: $(D_INLINECODE allocator is null).
396      */
397     this(S)(S init, shared Allocator allocator = defaultAllocator) @trusted
398     if (is(S == String))
399     {
400         this(allocator);
401         if (allocator !is init.allocator)
402         {
403             // Just steal all references and the allocator.
404             this.data = init.data;
405             this.length_ = init.length_;
406             this.capacity_ = init.capacity_;
407 
408             // Reset the source string, so it can't destroy the moved storage.
409             init.length_ = init.capacity_ = 0;
410             init.data = null;
411         }
412         else
413         {
414             reserve(init.length);
415             init.data[0 .. init.length].copy(this.data[0 .. init.length]);
416             this.length_ = init.length;
417         }
418     }
419 
420     /// ditto
421     this(S)(ref S init, shared Allocator allocator = defaultAllocator) @trusted
422     if (is(Unqual!S == String))
423     {
424         this(allocator);
425         reserve(init.length);
426         init.data[0 .. init.length].copy(this.data[0 .. init.length]);
427         this.length_ = init.length;
428     }
429 
430     /// ditto
431     this(shared Allocator allocator) @nogc nothrow pure @safe
432     in
433     {
434         assert(allocator !is null);
435     }
436     do
437     {
438         this.allocator_ = allocator;
439     }
440 
441     /**
442      * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
443      *
444      * Params:
445      *  C   = Type of the character to fill the string with.
446      *  n   = Number of characters to copy.
447      *  chr = Character to fill the string with.
448      *  allocator = Allocator.
449      */
450     this(C)(const size_t n,
451             const C chr,
452             shared Allocator allocator = defaultAllocator) @trusted
453     if (isSomeChar!C)
454     {
455         this(allocator);
456         if (n == 0)
457         {
458             return;
459         }
460         insertBack(chr);
461 
462         // insertBack should validate the character, so we can just copy it
463         // n - 1 times.
464         auto remaining = length * n;
465 
466         reserve(remaining);
467 
468         // Use a quick copy.
469         for (auto i = this.length_ * 2; i <= remaining; i *= 2)
470         {
471             this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
472             this.length_ = i;
473         }
474         remaining -= length;
475         copy(this.data[this.length_ - remaining .. this.length_],
476              this.data[this.length_ .. this.length_ + remaining]);
477         this.length_ += remaining;
478     }
479 
480     ///
481     @nogc pure @safe unittest
482     {
483         {
484             auto s = String(1, 'О');
485             assert(s.length == 2);
486         }
487         {
488             auto s = String(3, 'О');
489             assert(s.length == 6);
490         }
491         {
492             auto s = String(8, 'О');
493             assert(s.length == 16);
494         }
495     }
496 
497     this(this) @nogc nothrow pure @trusted
498     {
499         auto buf = this.data[0 .. this.length_];
500         this.length_ = capacity_ = 0;
501         this.data = null;
502         insertBack(buf);
503     }
504 
505     /**
506      * Destroys the string.
507      */
508     ~this() @nogc nothrow pure @trusted
509     {
510         allocator.resize(this.data[0 .. this.capacity_], 0);
511     }
512 
513     private void write4Bytes(ref const dchar src)
514     @nogc nothrow pure @trusted
515     in
516     {
517         assert(capacity - length >= 4);
518         assert(src - 0x10000 < 0x100000);
519     }
520     do
521     {
522         auto dst = this.data + length;
523 
524         *dst++ = 0xf0 | (src >> 18);
525         *dst++ = 0x80 | ((src >> 12) & 0x3f);
526         *dst++ = 0x80 | ((src >> 6) & 0x3f);
527         *dst = 0x80 | (src & 0x3f);
528 
529         this.length_ += 4;
530     }
531 
532     private size_t insertWideChar(C)(auto ref const C chr) @trusted
533     if (is(C == wchar) || is(C == dchar))
534     in
535     {
536         assert(capacity - length >= 3);
537     }
538     do
539     {
540         auto dst = this.data + length;
541         if (chr < 0x80)
542         {
543             *dst = chr & 0x7f;
544             this.length_ += 1;
545             return 1;
546         }
547         else if (chr < 0x800)
548         {
549             *dst++ = 0xc0 | (chr >> 6) & 0xff;
550             *dst = 0x80 | (chr & 0x3f);
551             this.length_ += 2;
552             return 2;
553         }
554         else if (chr < 0xd800 || (chr >= 0xe000 && chr <= 0xffff))
555         {
556             *dst++ = 0xe0 | (chr >> 12) & 0xff;
557             *dst++ = 0x80 | ((chr >> 6) & 0x3f);
558             *dst = 0x80 | (chr & 0x3f);
559             this.length_ += 3;
560             return 3;
561         }
562         return 0;
563     }
564 
565     /**
566      * Inserts a single character at the end of the string.
567      *
568      * Params:
569      *  chr = The character should be inserted.
570      *
571      * Returns: The number of bytes inserted (1).
572      */
573     size_t insertBack(char chr) @nogc nothrow pure @trusted
574     {
575         reserve(length + 1);
576 
577         *(data + length) = chr;
578         ++this.length_;
579 
580         return 1;
581     }
582 
583     /// ditto
584     size_t insertBack(const wchar chr) @nogc pure @trusted
585     {
586         reserve(length + 3);
587 
588         const ret = insertWideChar(chr);
589         if (ret == 0)
590         {
591             throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
592         }
593         return ret;
594     }
595 
596     /// ditto
597     size_t insertBack(const dchar chr) @nogc pure @trusted
598     {
599         reserve(length + dchar.sizeof);
600 
601         const ret = insertWideChar(chr);
602         if (ret > 0)
603         {
604             return ret;
605         }
606         else if (chr - 0x10000 < 0x100000)
607         {
608             write4Bytes(chr);
609             return 4;
610         }
611         else
612         {
613             throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
614         }
615     }
616 
617     /**
618      * Inserts a stringish range at the end of the string.
619      *
620      * Params:
621      *  R   = Type of the inserted string.
622      *  str = String should be inserted.
623      *
624      * Returns: The number of bytes inserted.
625      */
626     size_t insertBack(R)(R str) @trusted
627     if (!isInfinite!R
628      && isInputRange!R
629      && is(Unqual!(ElementType!R) == char))
630     {
631         size_t size;
632         static if (hasLength!R || isNarrowString!R)
633         {
634             size = str.length + length;
635             reserve(size);
636         }
637 
638         static if (isNarrowString!R)
639         {
640             str.copy(this.data[length .. size]);
641             this.length_ = size;
642             return str.length;
643         }
644         else static if (isInstanceOf!(ByCodeUnit, R))
645         {
646             str.get.copy(this.data[length .. size]);
647             this.length_ = size;
648             return str.length;
649         }
650         else
651         {
652             size_t insertedLength;
653             foreach (c; str)
654             {
655                 insertedLength += insertBack(c);
656             }
657             return insertedLength;
658         }
659     }
660 
661     /// ditto
662     size_t insertBack(R)(R str) @trusted
663     if (!isInfinite!R
664      && isInputRange!R
665      && is(Unqual!(ElementType!R) == wchar))
666     {
667         static if (hasLength!R || isNarrowString!R)
668         {
669             reserve(length + str.length * wchar.sizeof);
670         }
671 
672         static if (isNarrowString!R)
673         {
674             const(wchar)[] range = str;
675         }
676         else
677         {
678             alias range = str;
679         }
680 
681         auto oldLength = length;
682 
683         while (!range.empty)
684         {
685             reserve(length + wchar.sizeof * 2);
686 
687             auto ret = insertWideChar(range.front);
688             if (ret > 0)
689             {
690                 range.popFront();
691             }
692             else if (range.front - 0xd800 < 2048)
693             { // Surrogate pair.
694                 static if (isNarrowString!R)
695                 {
696                     if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
697                     {
698                         throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
699                     }
700                     dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
701 
702                     popFrontN(range, 2);
703                 }
704                 else
705                 {
706                     dchar d = range.front - 0xd800;
707                     range.popFront();
708 
709                     if (range.empty || range.front - 0xdc00 >= 0x400)
710                     {
711                         throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
712                     }
713                     d |= (range.front - 0xdc00) >> 10;
714 
715                     range.popFront();
716                 }
717                 write4Bytes(d);
718             }
719             else
720             {
721                 throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
722             }
723         }
724         return this.length_ - oldLength;
725     }
726 
727     /// ditto
728     size_t insertBack(R)(R str) @trusted
729     if (!isInfinite!R
730      && isInputRange!R
731      && is(Unqual!(ElementType!R) == dchar))
732     {
733         static if (hasLength!R || isSomeString!R)
734         {
735             reserve(length + str.length * 4);
736         }
737 
738         size_t insertedLength;
739         foreach (const dchar c; str)
740         {
741             insertedLength += insertBack(c);
742         }
743         return insertedLength;
744     }
745 
746     /// ditto
747     alias insert = insertBack;
748 
749     /**
750      * Reserves $(D_PARAM size) bytes for the string.
751      *
752      * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the
753      * function call does not cause a reallocation and the string capacity is not
754      * affected.
755      *
756      * Params:
757      *  size = Desired size in bytes.
758      */
759     void reserve(const size_t size) @nogc nothrow pure @trusted
760     {
761         if (this.capacity_ >= size)
762         {
763             return;
764         }
765 
766         this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr;
767         this.capacity_ = size;
768     }
769 
770     ///
771     @nogc nothrow pure @safe unittest
772     {
773         String s;
774         assert(s.capacity == 0);
775 
776         s.reserve(3);
777         assert(s.capacity == 3);
778 
779         s.reserve(3);
780         assert(s.capacity == 3);
781 
782         s.reserve(1);
783         assert(s.capacity == 3);
784     }
785 
786     /**
787      * Requests the string to reduce its capacity to fit the $(D_PARAM size).
788      *
789      * The request is non-binding. The string won't become smaller than the
790      * string byte length.
791      *
792      * Params:
793      *  size = Desired size.
794      */
795     void shrink(const size_t size) @nogc nothrow pure @trusted
796     {
797         if (this.capacity_ <= size)
798         {
799             return;
800         }
801 
802         const n = max(this.length_, size);
803         void[] buf = this.data[0 .. this.capacity_];
804         if (allocator.reallocate(buf, n))
805         {
806             this.capacity_ = n;
807             this.data = cast(char*) buf;
808         }
809     }
810 
811     ///
812     @nogc nothrow pure @safe unittest
813     {
814         auto s = String("Die Alten lasen laut.");
815         assert(s.capacity == 21);
816 
817         s.reserve(30);
818         s.shrink(25);
819         assert(s.capacity == 25);
820 
821         s.shrink(18);
822         assert(s.capacity == 21);
823 
824         s.shrink(22);
825         assert(s.capacity == 21);
826     }
827 
828     /**
829      * Returns: String capacity in bytes.
830      */
831     @property size_t capacity() const @nogc nothrow pure @safe
832     {
833         return this.capacity_;
834     }
835 
836     ///
837     @nogc nothrow pure @safe unittest
838     {
839         auto s = String("In allem Schreiben ist Schamlosigkeit.");
840         assert(s.capacity == 38);
841     }
842 
843     /**
844      * Slicing assignment.
845      *
846      * Params:
847      *  R     = $(D_KEYWORD char).
848      *  value = Assigned character, range or string.
849      *  i     = Slice start.
850      *  j     = Slice end.
851      *
852      * Returns: Slice with the assigned part of the string.
853      *
854      * Precondition: $(D_INLINECODE i <= j && j <= length
855      *                           && value.length == j - i)
856      */
857     ByCodeUnit!char opSliceAssign(R)(ByCodeUnit!R value,
858                                      const size_t i,
859                                      const size_t j)
860     if (is(Unqual!R == char))
861     in
862     {
863         assert(i <= j);
864         assert(j <= length);
865         assert(j - i == value.length);
866     }
867     do
868     {
869         auto target = opSlice(i, j);
870         copy(value, target);
871         return target;
872     }
873 
874     /// ditto
875     ByCodeUnit!char opSliceAssign(const char[] value,
876                                   const size_t i,
877                                   const size_t j)
878     @nogc nothrow pure @trusted
879     in
880     {
881         assert(i <= j);
882         assert(j <= length);
883     }
884     do
885     {
886         copy(value[], this.data[i .. j]);
887         return opSlice(i, j);
888     }
889 
890     /// ditto
891     ByCodeUnit!char opSliceAssign(const char value,
892                                   const size_t i,
893                                   const size_t j)
894     @nogc nothrow pure @trusted
895     in
896     {
897         assert(i <= j);
898         assert(j <= length);
899     }
900     do
901     {
902         for (auto p = this.data + i; p < this.data + j; ++p)
903         {
904             *p = value;
905         }
906         return opSlice(i, j);
907     }
908 
909     /**
910      * Returns an array used internally by the string.
911      * The length of the returned array may be smaller than the size of the
912      * reserved memory for the string.
913      *
914      * Returns: The array representing the string.
915      */
916     inout(char)[] get() inout @nogc nothrow pure @trusted
917     {
918         return this.data[0 .. this.length_];
919     }
920 
921     ///
922     @nogc nothrow pure @safe unittest
923     {
924         auto s = String("Char array.");
925         assert(s.get().length == 11);
926     }
927 
928     /**
929      * Returns null-terminated string. The returned string is managed by this
930      * object and shouldn't be freed.
931      *
932      * Returns: Null-terminated string.
933      */
934     const(char)* toStringz() @nogc nothrow pure @system
935     {
936         reserve(length + 1);
937         this.data[length] = '\0';
938         return this.data;
939     }
940 
941     ///
942     @nogc nothrow pure @system unittest
943     {
944         auto s = String("C string.");
945         assert(s.toStringz()[0] == 'C');
946         assert(s.toStringz()[9] == '\0');
947     }
948 
949     /**
950      * Returns: The number of code units that are required to encode the string.
951      */
952     @property size_t length() const @nogc nothrow pure @safe
953     {
954         return this.length_;
955     }
956 
957     ///
958     alias opDollar = length;
959 
960     ///
961     @nogc nothrow pure @safe unittest
962     {
963         auto s = String("Piscis primuin a capite foetat.");
964         assert(s.length == 31);
965         assert(s[$ - 1] == '.');
966     }
967 
968     /**
969      * Params:
970      *  pos = Position.
971      *
972      * Returns: Byte at $(D_PARAM pos).
973      *
974      * Precondition: $(D_INLINECODE length > pos).
975      */
976     ref inout(char) opIndex(const size_t pos) inout @nogc nothrow pure @trusted
977     in
978     {
979         assert(length > pos);
980     }
981     do
982     {
983         return *(this.data + pos);
984     }
985 
986     ///
987     @nogc nothrow pure @safe unittest
988     {
989         auto s = String("Alea iacta est.");
990         assert(s[0] == 'A');
991         assert(s[4] == ' ');
992     }
993 
994     /**
995      * Returns: Random access range that iterates over the string by bytes, in
996      *          forward order.
997      */
998     ByCodeUnit!char opIndex() @nogc nothrow pure @trusted
999     {
1000         return typeof(return)(this, this.data, this.data + length);
1001     }
1002 
1003     /// ditto
1004     ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
1005     {
1006         return typeof(return)(this, this.data, this.data + length);
1007     }
1008 
1009     ///
1010     @nogc nothrow pure @safe unittest
1011     {
1012         auto s = String("Plutarchus");
1013         auto r = s[];
1014         assert(r.front == 'P');
1015         assert(r.back == 's');
1016 
1017         r.popFront();
1018         assert(r.front == 'l');
1019         assert(r.back == 's');
1020 
1021         r.popBack();
1022         assert(r.front == 'l');
1023         assert(r.back == 'u');
1024 
1025         assert(r.length == 8);
1026     }
1027 
1028     ///
1029     @nogc nothrow pure @safe unittest
1030     {
1031         auto s = const String("Was ich vermag, soll gern geschehen. Goethe");
1032         auto r1 = s[];
1033         assert(r1.front == 'W');
1034 
1035         auto r2 = r1[];
1036         r1.popFront();
1037         assert(r1.front == 'a');
1038         assert(r2.front == 'W');
1039     }
1040 
1041     /**
1042      * Returns: Forward range that iterates over the string by code points.
1043      */
1044     ByCodePoint!char byCodePoint() @nogc nothrow pure @trusted
1045     {
1046         return typeof(return)(this, this.data, this.data + length);
1047     }
1048 
1049     /// ditto
1050     ByCodePoint!(const char) byCodePoint() const @nogc nothrow pure @trusted
1051     {
1052         return typeof(return)(this, this.data, this.data + length);
1053     }
1054 
1055     ///
1056     @nogc pure @safe unittest
1057     {
1058         auto s = String("Мне есть, что спеть, представ перед Всевышним.");
1059         auto cp = s.byCodePoint();
1060         assert(cp.front == 'М');
1061         cp.popFront();
1062         assert(cp.front == 'н');
1063 
1064         s = String("€");
1065         cp = s.byCodePoint();
1066         assert(cp.front == '€');
1067         assert(s.length == 3);
1068 
1069         s = String("\U00024B62");
1070         cp = s.byCodePoint();
1071         assert(cp.front == '\U00024B62');
1072         assert(s.length == 4);
1073     }
1074 
1075     ///
1076     @nogc pure @safe unittest
1077     {
1078         auto s = const String("Высоцкий");
1079         auto cp1 = s.byCodePoint();
1080         assert(cp1.front == 'В');
1081 
1082         auto cp2 = cp1[];
1083         cp1.popFront();
1084         assert(cp1.front == 'ы');
1085         assert(cp2.front == 'В');
1086 
1087         cp2 = cp1.save();
1088         cp1.popFront();
1089         assert(cp1.front == 'с');
1090         assert(cp2.front == 'ы');
1091     }
1092 
1093     /**
1094      * Returns whether the string is empty.
1095      *
1096      * Returns: $(D_KEYWORD true) if the string is empty, $(D_KEYWORD false)
1097      *          otherwise.
1098      */
1099     @property bool empty() const @nogc nothrow pure @safe
1100     {
1101         return length == 0;
1102     }
1103 
1104     ///
1105     @nogc nothrow pure @safe unittest
1106     {
1107         String s;
1108         assert(s.empty);
1109 
1110         s.insertBack('K');
1111         assert(!s.empty);
1112     }
1113 
1114     /**
1115      * Params:
1116      *  i = Slice start.
1117      *  j = Slice end.
1118      *
1119      * Returns: A range that iterates over the string by bytes from
1120      *          index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
1121      *
1122      * Precondition: $(D_INLINECODE i <= j && j <= length).
1123      */
1124     ByCodeUnit!char opSlice(const size_t i, const size_t j)
1125     @nogc nothrow pure @trusted
1126     in
1127     {
1128         assert(i <= j);
1129         assert(j <= length);
1130     }
1131     do
1132     {
1133         return typeof(return)(this, this.data + i, this.data + j);
1134     }
1135 
1136     /// ditto
1137     ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
1138     const @nogc nothrow pure @trusted
1139     in
1140     {
1141         assert(i <= j);
1142         assert(j <= length);
1143     }
1144     do
1145     {
1146         return typeof(return)(this, this.data + i, this.data + j);
1147     }
1148 
1149     ///
1150     @nogc nothrow pure @safe unittest
1151     {
1152         auto s = String("Vladimir Soloviev");
1153         auto r = s[9 .. $];
1154 
1155         assert(r.front == 'S');
1156         assert(r.back == 'v');
1157 
1158         r.popFront();
1159         r.popBack();
1160         assert(r.front == 'o');
1161         assert(r.back == 'e');
1162 
1163         r.popFront();
1164         r.popBack();
1165         assert(r.front == 'l');
1166         assert(r.back == 'i');
1167 
1168         r.popFront();
1169         r.popBack();
1170         assert(r.front == 'o');
1171         assert(r.back == 'v');
1172 
1173         r.popFront();
1174         r.popBack();
1175         assert(r.empty);
1176     }
1177 
1178     /**
1179      * Assigns another string.
1180      *
1181      * If $(D_PARAM that) is passed by value, it won't be copied, but moved.
1182      * This string will take the ownership over $(D_PARAM that)'s storage and
1183      * the allocator.
1184      *
1185      * If $(D_PARAM that) is passed by reference, it will be copied.
1186      *
1187      * Params:
1188      *  S    = Content type.
1189      *  that = The value should be assigned.
1190      *
1191      * Returns: $(D_KEYWORD this).
1192      */
1193     ref String opAssign(S)(S that)
1194     if (is(S == String))
1195     {
1196         swap(this.data, that.data);
1197         swap(this.length_, that.length_);
1198         swap(this.capacity_, that.capacity_);
1199         swap(this.allocator_, that.allocator_);
1200         return this;
1201     }
1202 
1203     /// ditto
1204     ref String opAssign(S)(ref S that) @trusted
1205     if (is(Unqual!S == String))
1206     {
1207         reserve(that.length);
1208         that.data[0 .. that.length].copy(this.data[0 .. that.length]);
1209         this.length_ = that.length;
1210         return this;
1211     }
1212 
1213     ///
1214     @nogc nothrow pure @safe unittest
1215     {
1216         auto s = String("Черная, потом пропахшая выть!");
1217         s = String("Как мне тебя не ласкать, не любить?");
1218     }
1219 
1220     /**
1221      * Assigns a stringish range.
1222      *
1223      * Params:
1224      *  S    = String type.
1225      *  that = Initial string.
1226      *
1227      * Returns: $(D_KEYWORD this).
1228      *
1229      * Throws: $(D_PSYMBOL UTFException).
1230      */
1231     ref String opAssign(S)(S that)
1232     if (!isInfinite!S
1233      && isInputRange!S
1234      && isSomeChar!(ElementType!S))
1235     {
1236         this.length_ = 0;
1237         insertBack(that);
1238         return this;
1239     }
1240 
1241     ///
1242     @nogc nothrow pure @safe unittest
1243     {
1244         auto s = String("Оловом светится лужная голь...");
1245         s = "Грустная песня, ты - русская боль.";
1246         assert(s == "Грустная песня, ты - русская боль.");
1247     }
1248 
1249     /**
1250      * Comparison for equality.
1251      *
1252      * Params:
1253      *  S    = Right hand side type.
1254      *  that = The string to compare with.
1255      *
1256      * Returns: A positive number if $(D_KEYWORD this) is lexicographically
1257      *          greater than $(D_PARAM that), if equal `0`, else `-1`.
1258      */
1259     int opCmp(S)(auto ref S that) const @trusted
1260     if (is(Unqual!S == String))
1261     {
1262         return cmp(this.data[0 .. length], that.data[0 .. that.length]);
1263     }
1264 
1265     /// ditto
1266     int opCmp(S)(ByCodeUnit!S that) const @trusted
1267     if (is(Unqual!S == char))
1268     {
1269         return cmp(this.data[0 .. length],
1270             that.begin[0 .. that.end - that.begin]);
1271     }
1272 
1273     /// ditto
1274     int opCmp(S)(ByCodePoint!S that) const @trusted
1275     if (is(Unqual!S == char))
1276     {
1277         return cmp(this.data[0 .. length],
1278             that.begin[0 .. that.end - that.begin]);
1279     }
1280 
1281     /// ditto
1282     int opCmp()(const char[] that) const @trusted
1283     {
1284         return cmp(this.data[0 .. length], that);
1285     }
1286 
1287     ///
1288     @nogc nothrow pure @safe unittest
1289     {
1290         assert(String("Голубая кофта.") < String("Синие глаза."));
1291         assert(String("Никакой я правды") < String("милой не сказал")[]);
1292     }
1293 
1294     /**
1295      * Comparison for equality.
1296      *
1297      * Params:
1298      *  S    = Right hand side type.
1299      *  that = The string to compare with.
1300      *
1301      * Returns: $(D_KEYWORD true) if the strings are equal, $(D_KEYWORD false)
1302      *          otherwise.
1303      */
1304     bool opEquals(S)(auto ref S that) const @trusted
1305     if (is(Unqual!S == String))
1306     {
1307         return equal(this.data[0 .. length], that.data[0 .. that.length]);
1308     }
1309 
1310     /**
1311      * Comparison for equality.
1312      *
1313      * Params:
1314      *  that = Right hand side string range.
1315      *
1316      * Returns: $(D_KEYWORD true) if the string and the range are equal,
1317      *          $(D_KEYWORD false) otherwise.
1318      */
1319     bool opEquals(S)(ByCodeUnit!S that) const @trusted
1320     if (is(Unqual!S == char))
1321     {
1322         return equal(this.data[0 .. length],
1323                      that.begin[0 .. that.end - that.begin]);
1324     }
1325 
1326     /// ditto
1327     bool opEquals(S)(ByCodePoint!S that) const @trusted
1328     if (is(Unqual!S == char))
1329     {
1330         return equal(this.data[0 .. length],
1331                      that.begin[0 .. that.end - that.begin]);
1332     }
1333 
1334     /// ditto
1335     bool opEquals()(const char[] that) const @trusted
1336     {
1337         return equal(this.data[0 .. length], that);
1338     }
1339 
1340     ///
1341     @nogc nothrow pure @safe unittest
1342     {
1343         assert(String("Милая спросила:") != String("Крутит ли метель?"));
1344         assert(String("Затопить бы печку,") != String("постелить постель.")[]);
1345         assert(const String("Я ответил милой:") != String("Нынче с высоты"));
1346         assert(String("Кто-то осыпает") != "белые цветы");
1347         assert(const String("Затопи ты печку,") != String("постели постель,")[]);
1348 
1349         auto s = const String("У меня на сердце");
1350         assert(s[] != String("без тебя метель."));
1351         assert(s == s);
1352         assert(s == s[]);
1353         assert(s == "У меня на сердце");
1354     }
1355 
1356     /**
1357      * Assigns a value to the character with the index $(D_PARAM pos).
1358      *
1359      * Params:
1360      *  value = Value.
1361      *  pos   = Position.
1362      *
1363      * Returns: Assigned value.
1364      *
1365      * Precondition: $(D_INLINECODE length > pos).
1366      */
1367     ref char opIndexAssign(const char value, const size_t pos)
1368     @nogc nothrow pure @safe
1369     {
1370         return opIndex(pos) = value;
1371     }
1372 
1373     ///
1374     @nogc nothrow pure @safe unittest
1375     {
1376         auto s = String("alea iacta est.");
1377 
1378         s[0] = 'A';
1379         assert(s[0] == 'A');
1380     }
1381 
1382     /**
1383      * Slicing assignment.
1384      *
1385      * Params:
1386      *  R     = $(D_KEYWORD char).
1387      *  value = Assigned character, range or string.
1388      *
1389      * Returns: Range over the string.
1390      *
1391      * Precondition: $(D_INLINECODE length == value.length).
1392      */
1393     ByCodeUnit!char opIndexAssign(R)(ByCodeUnit!R value)
1394     if (is(Unqual!R == char))
1395     {
1396         return opSliceAssign(value, 0, length);
1397     }
1398 
1399     /// ditto
1400     ByCodeUnit!char opIndexAssign(const char value) @nogc nothrow pure @safe
1401     {
1402         return opSliceAssign(value, 0, length);
1403     }
1404 
1405     /// ditto
1406     ByCodeUnit!char opIndexAssign(const char[] value) @nogc nothrow pure @safe
1407     {
1408         return opSliceAssign(value, 0, length);
1409     }
1410 
1411     /**
1412      * Remove all characters beloning to $(D_PARAM r).
1413      *
1414      * Params:
1415      *  R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint).
1416      *  r = Range originally obtained from this string.
1417      *
1418      * Returns: A range spanning the remaining characters in the string that
1419      *          initially were right after $(D_PARAM r).
1420      *
1421      * Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this).
1422      */
1423     R remove(R)(R r) @trusted
1424     if (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char))
1425     in
1426     {
1427         assert(r.container is &this);
1428         assert(r.begin >= this.data);
1429         assert(r.end <= this.data + length);
1430     }
1431     do
1432     {
1433         auto end = this.data + this.length;
1434         copy(ByCodeUnit!char(this, r.end, end), ByCodeUnit!char(this, r.begin, end));
1435         this.length_ = length - (r.end - r.begin);
1436         return R(this, r.begin, this.data + length);
1437     }
1438 
1439     ///
1440     @nogc pure @safe unittest
1441     {
1442         import std.algorithm.searching : count;
1443 
1444         auto s = String("Из пословицы слова не выкинешь.");
1445 
1446         assert(s.remove(s[5 .. 24]).length == 33);
1447         assert(s == "Из слова не выкинешь.");
1448         assert(s.length == 38);
1449 
1450         auto byCodePoint = s.byCodePoint();
1451         popFrontN(byCodePoint, 8);
1452 
1453         assert(s.remove(byCodePoint).count == 0);
1454         assert(s == "Из слова");
1455 
1456         assert(s.remove(s[]).length == 0);
1457         assert(s.length == 0);
1458 
1459         assert(s.remove(s[]).length == 0);
1460     }
1461 
1462     /**
1463      * Inserts $(D_PARAM el) before or after $(D_PARAM r).
1464      *
1465      * Params:
1466      *  R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint).
1467      *  T  = Stringish type.
1468      *  r  = Range originally obtained from this string.
1469      *  el = Value(s) should be inserted.
1470      *
1471      * Returns: The number of elements inserted.
1472      *
1473      * Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this).
1474      */
1475     size_t insertAfter(T, R)(R r, T el) @trusted
1476     if ((isSomeChar!T || (!isInfinite!T
1477         && isInputRange!T
1478         && isSomeChar!(ElementType!T)))
1479         && (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char)))
1480     in
1481     {
1482         assert(r.container is &this);
1483         assert(r.begin >= this.data);
1484         assert(r.end <= this.data + length);
1485     }
1486     do
1487     {
1488         const oldLength = length;
1489         const after = r.end - this.data;
1490         const inserted = insertBack(el);
1491 
1492         bringToFront(this.data[after .. oldLength], this.data[oldLength .. length]);
1493         return inserted;
1494     }
1495 
1496     ///
1497     @nogc nothrow pure @safe unittest
1498     {
1499         auto s = String("Казнить нельзя помиловать.");
1500         s.insertAfter(s[0 .. 27], ",");
1501         assert(s == "Казнить нельзя, помиловать.");
1502 
1503         s = String("Казнить нельзя помиловать.");
1504         s.insertAfter(s[0 .. 14], ',');
1505         assert(s == "Казнить, нельзя помиловать.");
1506     }
1507 
1508     ///
1509     size_t insertBefore(T, R)(R r, T el) @trusted
1510     if ((isSomeChar!T || (!isInfinite!T
1511         && isInputRange!T
1512         && isSomeChar!(ElementType!T)))
1513         && (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char)))
1514     in
1515     {
1516         assert(r.container is &this);
1517         assert(r.begin >= this.data);
1518         assert(r.end <= this.data + length);
1519     }
1520     do
1521     {
1522         return insertAfter(R(this, this.data, r.begin), el);
1523     }
1524 
1525     ///
1526     @nogc nothrow pure @safe unittest
1527     {
1528         auto s = String("Казнить нельзя помиловать.");
1529         s.insertBefore(s[27 .. $], ",");
1530         assert(s == "Казнить нельзя, помиловать.");
1531 
1532         s = String("Казнить нельзя помиловать.");
1533         s.insertBefore(s[14 .. $], ',');
1534         assert(s == "Казнить, нельзя помиловать.");
1535     }
1536 
1537     /**
1538      * Calculates the hash value for the string.
1539      *
1540      * Returns: Hash value for the string.
1541      */
1542     size_t toHash() const @nogc nothrow pure @safe
1543     {
1544         return hash(get);
1545     }
1546 
1547     mixin DefaultAllocator;
1548 }