1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /** 6 * UTF-8 encoded string. 7 * 8 * You can create a $(D_PSYMBOL String) from a literal string, single character 9 * or character range. Characters can be of the type $(D_KEYWORD char), 10 * $(D_KEYWORD wchar) or $(D_KEYWORD dchar). Literal strings, characters and 11 * character ranges can be also inserted into an existing string. 12 * 13 * $(D_PSYMBOL String) is always valid UTF-8. Inserting an invalid sequence 14 * or working on a corrupted $(D_PSYMBOL String) causes 15 * $(D_PSYMBOL UTFException) to be thrown. 16 * 17 * Internally $(D_PSYMBOL String) is represented by a sequence of 18 * $(D_KEYWORD char)s. 19 * 20 * Copyright: Eugene Wissner 2017-2020. 21 * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, 22 * Mozilla Public License, v. 2.0). 23 * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) 24 * Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/string.d, 25 * tanya/container/string.d) 26 */ 27 module tanya.container..string; 28 29 import std.algorithm.comparison; 30 import std.algorithm.mutation : bringToFront; 31 import tanya.algorithm.mutation; 32 import tanya.hash.lookup; 33 import tanya.memory.allocator; 34 import tanya.memory.lifetime; 35 import tanya.meta.trait; 36 import tanya.meta.transform; 37 import tanya.range.array; 38 import tanya.range.primitive; 39 40 /** 41 * Thrown on encoding errors. 42 */ 43 class UTFException : Exception 44 { 45 /** 46 * Params: 47 * msg = The message for the exception. 48 * file = The file where the exception occurred. 49 * line = The line number where the exception occurred. 50 * next = The previous exception in the chain of exceptions, if any. 51 */ 52 this(string msg, 53 string file = __FILE__, 54 size_t line = __LINE__, 55 Throwable next = null) @nogc @safe pure nothrow 56 { 57 super(msg, file, line, next); 58 } 59 } 60 61 /** 62 * Iterates $(D_PSYMBOL String) by UTF-8 code unit. 63 * 64 * Params: 65 * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). 66 */ 67 struct ByCodeUnit(E) 68 if (is(Unqual!E == char)) 69 { 70 private E* begin, end; 71 private alias ContainerType = CopyConstness!(E, String); 72 private ContainerType* container; 73 74 invariant 75 { 76 assert(this.begin <= this.end); 77 assert(this.container !is null); 78 assert(this.begin >= this.container.data); 79 assert(this.end <= this.container.data + this.container.length); 80 } 81 82 private this(ref ContainerType container, E* begin, E* end) @trusted 83 in 84 { 85 assert(begin <= end); 86 assert(begin >= container.data); 87 assert(end <= container.data + container.length); 88 } 89 do 90 { 91 this.container = &container; 92 this.begin = begin; 93 this.end = end; 94 } 95 96 @disable this(); 97 98 @property ByCodeUnit save() 99 { 100 return this; 101 } 102 103 @property bool empty() const 104 { 105 return this.begin == this.end; 106 } 107 108 @property size_t length() const 109 { 110 return this.end - this.begin; 111 } 112 113 alias opDollar = length; 114 115 @property ref inout(E) front() inout 116 in 117 { 118 assert(!empty); 119 } 120 do 121 { 122 return *this.begin; 123 } 124 125 @property ref inout(E) back() inout @trusted 126 in 127 { 128 assert(!empty); 129 } 130 do 131 { 132 return *(this.end - 1); 133 } 134 135 void popFront() @trusted 136 in 137 { 138 assert(!empty); 139 } 140 do 141 { 142 ++this.begin; 143 } 144 145 void popBack() @trusted 146 in 147 { 148 assert(!empty); 149 } 150 do 151 { 152 --this.end; 153 } 154 155 ref inout(E) opIndex(const size_t i) inout @trusted 156 in 157 { 158 assert(i < length); 159 } 160 do 161 { 162 return *(this.begin + i); 163 } 164 165 ByCodeUnit opIndex() 166 { 167 return typeof(return)(*this.container, this.begin, this.end); 168 } 169 170 ByCodeUnit!(const E) opIndex() const 171 { 172 return typeof(return)(*this.container, this.begin, this.end); 173 } 174 175 ByCodeUnit opSlice(const size_t i, const size_t j) @trusted 176 in 177 { 178 assert(i <= j); 179 assert(j <= length); 180 } 181 do 182 { 183 return typeof(return)(*this.container, this.begin + i, this.begin + j); 184 } 185 186 ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted 187 in 188 { 189 assert(i <= j); 190 assert(j <= length); 191 } 192 do 193 { 194 return typeof(return)(*this.container, this.begin + i, this.begin + j); 195 } 196 197 inout(E)[] get() inout @trusted 198 { 199 return this.begin[0 .. length]; 200 } 201 } 202 203 /** 204 * Iterates $(D_PSYMBOL String) by UTF-8 code point. 205 * 206 * Params: 207 * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). 208 */ 209 struct ByCodePoint(E) 210 if (is(Unqual!E == char)) 211 { 212 private E* begin, end; 213 private alias ContainerType = CopyConstness!(E, String); 214 private ContainerType* container; 215 216 invariant 217 { 218 assert(this.begin <= this.end); 219 assert(this.container !is null); 220 assert(this.begin >= this.container.data); 221 assert(this.end <= this.container.data + this.container.length); 222 } 223 224 private this(ref ContainerType container, E* begin, E* end) @trusted 225 in 226 { 227 assert(begin <= end); 228 assert(begin >= container.data); 229 assert(end <= container.data + container.length); 230 } 231 do 232 { 233 this.container = &container; 234 this.begin = begin; 235 this.end = end; 236 } 237 238 @disable this(); 239 240 @property ByCodePoint save() 241 { 242 return this; 243 } 244 245 @property bool empty() const 246 { 247 return this.begin == this.end; 248 } 249 250 @property dchar front() const @trusted 251 in 252 { 253 assert(!empty); 254 } 255 out (chr) 256 { 257 assert(chr < 0xd800 || chr > 0xdfff); 258 } 259 do 260 { 261 dchar chr; 262 ubyte units; 263 int mask; 264 const(char)* it = this.begin; 265 266 if (*it & 0x80) 267 { 268 mask = 0xe0; 269 for (units = 2; ((*it << units) & 0x80) != 0; ++units) 270 { 271 mask = (mask >> 1) | 0x80; 272 } 273 } 274 if (this.begin + units > end || units > 4) 275 { 276 throw defaultAllocator.make!UTFException("Invalid UTF-8 character"); 277 } 278 chr = *it++ & ~mask; 279 280 for (; units > 1; --units) 281 { 282 chr = (chr << 6) | (*it++ & 0x3f); 283 } 284 285 return chr; 286 } 287 288 void popFront() @trusted 289 in 290 { 291 assert(!empty); 292 } 293 do 294 { 295 ubyte units; 296 if ((*begin & 0xf0) == 0xf0) 297 { 298 units = 4; 299 } 300 else if ((*begin & 0xe0) == 0xe0) 301 { 302 units = 3; 303 } 304 else if ((*begin & 0xc0) == 0xc0) 305 { 306 units = 2; 307 } 308 else if ((*begin & 0x80) == 0) 309 { 310 units = 1; 311 } 312 if (units == 0 || this.begin + units > this.end) 313 { 314 throw defaultAllocator.make!UTFException("Invalid UTF-8 character"); 315 } 316 this.begin += units; 317 } 318 319 ByCodePoint opIndex() 320 { 321 return typeof(return)(*this.container, this.begin, this.end); 322 } 323 324 ByCodePoint!(const E) opIndex() const 325 { 326 return typeof(return)(*this.container, this.begin, this.end); 327 } 328 } 329 330 /** 331 * UTF-8 string. 332 */ 333 struct String 334 { 335 private size_t length_; 336 private char* data; 337 private size_t capacity_; 338 339 @nogc nothrow pure @safe invariant 340 { 341 assert(this.length_ <= this.capacity_); 342 } 343 344 /** 345 * Constructs the string from a stringish range. 346 * 347 * Params: 348 * S = String type. 349 * str = Initial string. 350 * allocator = Allocator. 351 * 352 * Throws: $(D_PSYMBOL UTFException). 353 * 354 * Precondition: $(D_INLINECODE allocator is null). 355 */ 356 this(S)(const S str, shared Allocator allocator = defaultAllocator) 357 if (!isInfinite!S 358 && isInputRange!S 359 && isSomeChar!(ElementType!S)) 360 { 361 this(allocator); 362 insertBack(str); 363 } 364 365 /// 366 @nogc pure @safe unittest 367 { 368 auto s = String("\u10437"w); 369 assert(s == "\u10437"); 370 } 371 372 /// 373 @nogc pure @safe unittest 374 { 375 auto s = String("Отказаться от вина - в этом страшная вина."d); 376 assert(s == "Отказаться от вина - в этом страшная вина."); 377 } 378 379 /** 380 * Initializes this string from another one. 381 * 382 * If $(D_PARAM init) is passed by value, it won't be copied, but moved. 383 * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator), 384 * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s 385 * storage, otherwise, the storage will be allocated with 386 * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end. 387 * 388 * If $(D_PARAM init) is passed by reference, it will be copied. 389 * 390 * Params: 391 * S = Source string type. 392 * init = Source string. 393 * allocator = Allocator. 394 * 395 * Precondition: $(D_INLINECODE allocator is null). 396 */ 397 this(S)(S init, shared Allocator allocator = defaultAllocator) @trusted 398 if (is(S == String)) 399 { 400 this(allocator); 401 if (allocator !is init.allocator) 402 { 403 // Just steal all references and the allocator. 404 this.data = init.data; 405 this.length_ = init.length_; 406 this.capacity_ = init.capacity_; 407 408 // Reset the source string, so it can't destroy the moved storage. 409 init.length_ = init.capacity_ = 0; 410 init.data = null; 411 } 412 else 413 { 414 reserve(init.length); 415 init.data[0 .. init.length].copy(this.data[0 .. init.length]); 416 this.length_ = init.length; 417 } 418 } 419 420 /// ditto 421 this(S)(ref S init, shared Allocator allocator = defaultAllocator) @trusted 422 if (is(Unqual!S == String)) 423 { 424 this(allocator); 425 reserve(init.length); 426 init.data[0 .. init.length].copy(this.data[0 .. init.length]); 427 this.length_ = init.length; 428 } 429 430 /// ditto 431 this(shared Allocator allocator) @nogc nothrow pure @safe 432 in 433 { 434 assert(allocator !is null); 435 } 436 do 437 { 438 this.allocator_ = allocator; 439 } 440 441 /** 442 * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr). 443 * 444 * Params: 445 * C = Type of the character to fill the string with. 446 * n = Number of characters to copy. 447 * chr = Character to fill the string with. 448 * allocator = Allocator. 449 */ 450 this(C)(const size_t n, 451 const C chr, 452 shared Allocator allocator = defaultAllocator) @trusted 453 if (isSomeChar!C) 454 { 455 this(allocator); 456 if (n == 0) 457 { 458 return; 459 } 460 insertBack(chr); 461 462 // insertBack should validate the character, so we can just copy it 463 // n - 1 times. 464 auto remaining = length * n; 465 466 reserve(remaining); 467 468 // Use a quick copy. 469 for (auto i = this.length_ * 2; i <= remaining; i *= 2) 470 { 471 this.data[0 .. this.length_].copy(this.data[this.length_ .. i]); 472 this.length_ = i; 473 } 474 remaining -= length; 475 copy(this.data[this.length_ - remaining .. this.length_], 476 this.data[this.length_ .. this.length_ + remaining]); 477 this.length_ += remaining; 478 } 479 480 /// 481 @nogc pure @safe unittest 482 { 483 { 484 auto s = String(1, 'О'); 485 assert(s.length == 2); 486 } 487 { 488 auto s = String(3, 'О'); 489 assert(s.length == 6); 490 } 491 { 492 auto s = String(8, 'О'); 493 assert(s.length == 16); 494 } 495 } 496 497 this(this) @nogc nothrow pure @trusted 498 { 499 auto buf = this.data[0 .. this.length_]; 500 this.length_ = capacity_ = 0; 501 this.data = null; 502 insertBack(buf); 503 } 504 505 /** 506 * Destroys the string. 507 */ 508 ~this() @nogc nothrow pure @trusted 509 { 510 allocator.resize(this.data[0 .. this.capacity_], 0); 511 } 512 513 private void write4Bytes(ref const dchar src) 514 @nogc nothrow pure @trusted 515 in 516 { 517 assert(capacity - length >= 4); 518 assert(src - 0x10000 < 0x100000); 519 } 520 do 521 { 522 auto dst = this.data + length; 523 524 *dst++ = 0xf0 | (src >> 18); 525 *dst++ = 0x80 | ((src >> 12) & 0x3f); 526 *dst++ = 0x80 | ((src >> 6) & 0x3f); 527 *dst = 0x80 | (src & 0x3f); 528 529 this.length_ += 4; 530 } 531 532 private size_t insertWideChar(C)(auto ref const C chr) @trusted 533 if (is(C == wchar) || is(C == dchar)) 534 in 535 { 536 assert(capacity - length >= 3); 537 } 538 do 539 { 540 auto dst = this.data + length; 541 if (chr < 0x80) 542 { 543 *dst = chr & 0x7f; 544 this.length_ += 1; 545 return 1; 546 } 547 else if (chr < 0x800) 548 { 549 *dst++ = 0xc0 | (chr >> 6) & 0xff; 550 *dst = 0x80 | (chr & 0x3f); 551 this.length_ += 2; 552 return 2; 553 } 554 else if (chr < 0xd800 || (chr >= 0xe000 && chr <= 0xffff)) 555 { 556 *dst++ = 0xe0 | (chr >> 12) & 0xff; 557 *dst++ = 0x80 | ((chr >> 6) & 0x3f); 558 *dst = 0x80 | (chr & 0x3f); 559 this.length_ += 3; 560 return 3; 561 } 562 return 0; 563 } 564 565 /** 566 * Inserts a single character at the end of the string. 567 * 568 * Params: 569 * chr = The character should be inserted. 570 * 571 * Returns: The number of bytes inserted (1). 572 */ 573 size_t insertBack(char chr) @nogc nothrow pure @trusted 574 { 575 reserve(length + 1); 576 577 *(data + length) = chr; 578 ++this.length_; 579 580 return 1; 581 } 582 583 /// ditto 584 size_t insertBack(const wchar chr) @nogc pure @trusted 585 { 586 reserve(length + 3); 587 588 const ret = insertWideChar(chr); 589 if (ret == 0) 590 { 591 throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); 592 } 593 return ret; 594 } 595 596 /// ditto 597 size_t insertBack(const dchar chr) @nogc pure @trusted 598 { 599 reserve(length + dchar.sizeof); 600 601 const ret = insertWideChar(chr); 602 if (ret > 0) 603 { 604 return ret; 605 } 606 else if (chr - 0x10000 < 0x100000) 607 { 608 write4Bytes(chr); 609 return 4; 610 } 611 else 612 { 613 throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce"); 614 } 615 } 616 617 /** 618 * Inserts a stringish range at the end of the string. 619 * 620 * Params: 621 * R = Type of the inserted string. 622 * str = String should be inserted. 623 * 624 * Returns: The number of bytes inserted. 625 */ 626 size_t insertBack(R)(R str) @trusted 627 if (!isInfinite!R 628 && isInputRange!R 629 && is(Unqual!(ElementType!R) == char)) 630 { 631 size_t size; 632 static if (hasLength!R || isNarrowString!R) 633 { 634 size = str.length + length; 635 reserve(size); 636 } 637 638 static if (isNarrowString!R) 639 { 640 str.copy(this.data[length .. size]); 641 this.length_ = size; 642 return str.length; 643 } 644 else static if (isInstanceOf!(ByCodeUnit, R)) 645 { 646 str.get.copy(this.data[length .. size]); 647 this.length_ = size; 648 return str.length; 649 } 650 else 651 { 652 size_t insertedLength; 653 foreach (c; str) 654 { 655 insertedLength += insertBack(c); 656 } 657 return insertedLength; 658 } 659 } 660 661 /// ditto 662 size_t insertBack(R)(R str) @trusted 663 if (!isInfinite!R 664 && isInputRange!R 665 && is(Unqual!(ElementType!R) == wchar)) 666 { 667 static if (hasLength!R || isNarrowString!R) 668 { 669 reserve(length + str.length * wchar.sizeof); 670 } 671 672 static if (isNarrowString!R) 673 { 674 const(wchar)[] range = str; 675 } 676 else 677 { 678 alias range = str; 679 } 680 681 auto oldLength = length; 682 683 while (!range.empty) 684 { 685 reserve(length + wchar.sizeof * 2); 686 687 auto ret = insertWideChar(range.front); 688 if (ret > 0) 689 { 690 range.popFront(); 691 } 692 else if (range.front - 0xd800 < 2048) 693 { // Surrogate pair. 694 static if (isNarrowString!R) 695 { 696 if (range.length < 2 || range[1] - 0xdc00 >= 0x400) 697 { 698 throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); 699 } 700 dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10); 701 702 popFrontN(range, 2); 703 } 704 else 705 { 706 dchar d = range.front - 0xd800; 707 range.popFront(); 708 709 if (range.empty || range.front - 0xdc00 >= 0x400) 710 { 711 throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); 712 } 713 d |= (range.front - 0xdc00) >> 10; 714 715 range.popFront(); 716 } 717 write4Bytes(d); 718 } 719 else 720 { 721 throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); 722 } 723 } 724 return this.length_ - oldLength; 725 } 726 727 /// ditto 728 size_t insertBack(R)(R str) @trusted 729 if (!isInfinite!R 730 && isInputRange!R 731 && is(Unqual!(ElementType!R) == dchar)) 732 { 733 static if (hasLength!R || isSomeString!R) 734 { 735 reserve(length + str.length * 4); 736 } 737 738 size_t insertedLength; 739 foreach (const dchar c; str) 740 { 741 insertedLength += insertBack(c); 742 } 743 return insertedLength; 744 } 745 746 /// ditto 747 alias insert = insertBack; 748 749 /** 750 * Reserves $(D_PARAM size) bytes for the string. 751 * 752 * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the 753 * function call does not cause a reallocation and the string capacity is not 754 * affected. 755 * 756 * Params: 757 * size = Desired size in bytes. 758 */ 759 void reserve(const size_t size) @nogc nothrow pure @trusted 760 { 761 if (this.capacity_ >= size) 762 { 763 return; 764 } 765 766 this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr; 767 this.capacity_ = size; 768 } 769 770 /// 771 @nogc nothrow pure @safe unittest 772 { 773 String s; 774 assert(s.capacity == 0); 775 776 s.reserve(3); 777 assert(s.capacity == 3); 778 779 s.reserve(3); 780 assert(s.capacity == 3); 781 782 s.reserve(1); 783 assert(s.capacity == 3); 784 } 785 786 /** 787 * Requests the string to reduce its capacity to fit the $(D_PARAM size). 788 * 789 * The request is non-binding. The string won't become smaller than the 790 * string byte length. 791 * 792 * Params: 793 * size = Desired size. 794 */ 795 void shrink(const size_t size) @nogc nothrow pure @trusted 796 { 797 if (this.capacity_ <= size) 798 { 799 return; 800 } 801 802 const n = max(this.length_, size); 803 void[] buf = this.data[0 .. this.capacity_]; 804 if (allocator.reallocate(buf, n)) 805 { 806 this.capacity_ = n; 807 this.data = cast(char*) buf; 808 } 809 } 810 811 /// 812 @nogc nothrow pure @safe unittest 813 { 814 auto s = String("Die Alten lasen laut."); 815 assert(s.capacity == 21); 816 817 s.reserve(30); 818 s.shrink(25); 819 assert(s.capacity == 25); 820 821 s.shrink(18); 822 assert(s.capacity == 21); 823 824 s.shrink(22); 825 assert(s.capacity == 21); 826 } 827 828 /** 829 * Returns: String capacity in bytes. 830 */ 831 @property size_t capacity() const @nogc nothrow pure @safe 832 { 833 return this.capacity_; 834 } 835 836 /// 837 @nogc nothrow pure @safe unittest 838 { 839 auto s = String("In allem Schreiben ist Schamlosigkeit."); 840 assert(s.capacity == 38); 841 } 842 843 /** 844 * Slicing assignment. 845 * 846 * Params: 847 * R = $(D_KEYWORD char). 848 * value = Assigned character, range or string. 849 * i = Slice start. 850 * j = Slice end. 851 * 852 * Returns: Slice with the assigned part of the string. 853 * 854 * Precondition: $(D_INLINECODE i <= j && j <= length 855 * && value.length == j - i) 856 */ 857 ByCodeUnit!char opSliceAssign(R)(ByCodeUnit!R value, 858 const size_t i, 859 const size_t j) 860 if (is(Unqual!R == char)) 861 in 862 { 863 assert(i <= j); 864 assert(j <= length); 865 assert(j - i == value.length); 866 } 867 do 868 { 869 auto target = opSlice(i, j); 870 copy(value, target); 871 return target; 872 } 873 874 /// ditto 875 ByCodeUnit!char opSliceAssign(const char[] value, 876 const size_t i, 877 const size_t j) 878 @nogc nothrow pure @trusted 879 in 880 { 881 assert(i <= j); 882 assert(j <= length); 883 } 884 do 885 { 886 copy(value[], this.data[i .. j]); 887 return opSlice(i, j); 888 } 889 890 /// ditto 891 ByCodeUnit!char opSliceAssign(const char value, 892 const size_t i, 893 const size_t j) 894 @nogc nothrow pure @trusted 895 in 896 { 897 assert(i <= j); 898 assert(j <= length); 899 } 900 do 901 { 902 for (auto p = this.data + i; p < this.data + j; ++p) 903 { 904 *p = value; 905 } 906 return opSlice(i, j); 907 } 908 909 /** 910 * Returns an array used internally by the string. 911 * The length of the returned array may be smaller than the size of the 912 * reserved memory for the string. 913 * 914 * Returns: The array representing the string. 915 */ 916 inout(char)[] get() inout @nogc nothrow pure @trusted 917 { 918 return this.data[0 .. this.length_]; 919 } 920 921 /// 922 @nogc nothrow pure @safe unittest 923 { 924 auto s = String("Char array."); 925 assert(s.get().length == 11); 926 } 927 928 /** 929 * Returns null-terminated string. The returned string is managed by this 930 * object and shouldn't be freed. 931 * 932 * Returns: Null-terminated string. 933 */ 934 const(char)* toStringz() @nogc nothrow pure @system 935 { 936 reserve(length + 1); 937 this.data[length] = '\0'; 938 return this.data; 939 } 940 941 /// 942 @nogc nothrow pure @system unittest 943 { 944 auto s = String("C string."); 945 assert(s.toStringz()[0] == 'C'); 946 assert(s.toStringz()[9] == '\0'); 947 } 948 949 /** 950 * Returns: The number of code units that are required to encode the string. 951 */ 952 @property size_t length() const @nogc nothrow pure @safe 953 { 954 return this.length_; 955 } 956 957 /// 958 alias opDollar = length; 959 960 /// 961 @nogc nothrow pure @safe unittest 962 { 963 auto s = String("Piscis primuin a capite foetat."); 964 assert(s.length == 31); 965 assert(s[$ - 1] == '.'); 966 } 967 968 /** 969 * Params: 970 * pos = Position. 971 * 972 * Returns: Byte at $(D_PARAM pos). 973 * 974 * Precondition: $(D_INLINECODE length > pos). 975 */ 976 ref inout(char) opIndex(const size_t pos) inout @nogc nothrow pure @trusted 977 in 978 { 979 assert(length > pos); 980 } 981 do 982 { 983 return *(this.data + pos); 984 } 985 986 /// 987 @nogc nothrow pure @safe unittest 988 { 989 auto s = String("Alea iacta est."); 990 assert(s[0] == 'A'); 991 assert(s[4] == ' '); 992 } 993 994 /** 995 * Returns: Random access range that iterates over the string by bytes, in 996 * forward order. 997 */ 998 ByCodeUnit!char opIndex() @nogc nothrow pure @trusted 999 { 1000 return typeof(return)(this, this.data, this.data + length); 1001 } 1002 1003 /// ditto 1004 ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc 1005 { 1006 return typeof(return)(this, this.data, this.data + length); 1007 } 1008 1009 /// 1010 @nogc nothrow pure @safe unittest 1011 { 1012 auto s = String("Plutarchus"); 1013 auto r = s[]; 1014 assert(r.front == 'P'); 1015 assert(r.back == 's'); 1016 1017 r.popFront(); 1018 assert(r.front == 'l'); 1019 assert(r.back == 's'); 1020 1021 r.popBack(); 1022 assert(r.front == 'l'); 1023 assert(r.back == 'u'); 1024 1025 assert(r.length == 8); 1026 } 1027 1028 /// 1029 @nogc nothrow pure @safe unittest 1030 { 1031 auto s = const String("Was ich vermag, soll gern geschehen. Goethe"); 1032 auto r1 = s[]; 1033 assert(r1.front == 'W'); 1034 1035 auto r2 = r1[]; 1036 r1.popFront(); 1037 assert(r1.front == 'a'); 1038 assert(r2.front == 'W'); 1039 } 1040 1041 /** 1042 * Returns: Forward range that iterates over the string by code points. 1043 */ 1044 ByCodePoint!char byCodePoint() @nogc nothrow pure @trusted 1045 { 1046 return typeof(return)(this, this.data, this.data + length); 1047 } 1048 1049 /// ditto 1050 ByCodePoint!(const char) byCodePoint() const @nogc nothrow pure @trusted 1051 { 1052 return typeof(return)(this, this.data, this.data + length); 1053 } 1054 1055 /// 1056 @nogc pure @safe unittest 1057 { 1058 auto s = String("Мне есть, что спеть, представ перед Всевышним."); 1059 auto cp = s.byCodePoint(); 1060 assert(cp.front == 'М'); 1061 cp.popFront(); 1062 assert(cp.front == 'н'); 1063 1064 s = String("€"); 1065 cp = s.byCodePoint(); 1066 assert(cp.front == '€'); 1067 assert(s.length == 3); 1068 1069 s = String("\U00024B62"); 1070 cp = s.byCodePoint(); 1071 assert(cp.front == '\U00024B62'); 1072 assert(s.length == 4); 1073 } 1074 1075 /// 1076 @nogc pure @safe unittest 1077 { 1078 auto s = const String("Высоцкий"); 1079 auto cp1 = s.byCodePoint(); 1080 assert(cp1.front == 'В'); 1081 1082 auto cp2 = cp1[]; 1083 cp1.popFront(); 1084 assert(cp1.front == 'ы'); 1085 assert(cp2.front == 'В'); 1086 1087 cp2 = cp1.save(); 1088 cp1.popFront(); 1089 assert(cp1.front == 'с'); 1090 assert(cp2.front == 'ы'); 1091 } 1092 1093 /** 1094 * Returns whether the string is empty. 1095 * 1096 * Returns: $(D_KEYWORD true) if the string is empty, $(D_KEYWORD false) 1097 * otherwise. 1098 */ 1099 @property bool empty() const @nogc nothrow pure @safe 1100 { 1101 return length == 0; 1102 } 1103 1104 /// 1105 @nogc nothrow pure @safe unittest 1106 { 1107 String s; 1108 assert(s.empty); 1109 1110 s.insertBack('K'); 1111 assert(!s.empty); 1112 } 1113 1114 /** 1115 * Params: 1116 * i = Slice start. 1117 * j = Slice end. 1118 * 1119 * Returns: A range that iterates over the string by bytes from 1120 * index $(D_PARAM i) up to (excluding) index $(D_PARAM j). 1121 * 1122 * Precondition: $(D_INLINECODE i <= j && j <= length). 1123 */ 1124 ByCodeUnit!char opSlice(const size_t i, const size_t j) 1125 @nogc nothrow pure @trusted 1126 in 1127 { 1128 assert(i <= j); 1129 assert(j <= length); 1130 } 1131 do 1132 { 1133 return typeof(return)(this, this.data + i, this.data + j); 1134 } 1135 1136 /// ditto 1137 ByCodeUnit!(const char) opSlice(const size_t i, const size_t j) 1138 const @nogc nothrow pure @trusted 1139 in 1140 { 1141 assert(i <= j); 1142 assert(j <= length); 1143 } 1144 do 1145 { 1146 return typeof(return)(this, this.data + i, this.data + j); 1147 } 1148 1149 /// 1150 @nogc nothrow pure @safe unittest 1151 { 1152 auto s = String("Vladimir Soloviev"); 1153 auto r = s[9 .. $]; 1154 1155 assert(r.front == 'S'); 1156 assert(r.back == 'v'); 1157 1158 r.popFront(); 1159 r.popBack(); 1160 assert(r.front == 'o'); 1161 assert(r.back == 'e'); 1162 1163 r.popFront(); 1164 r.popBack(); 1165 assert(r.front == 'l'); 1166 assert(r.back == 'i'); 1167 1168 r.popFront(); 1169 r.popBack(); 1170 assert(r.front == 'o'); 1171 assert(r.back == 'v'); 1172 1173 r.popFront(); 1174 r.popBack(); 1175 assert(r.empty); 1176 } 1177 1178 /** 1179 * Assigns another string. 1180 * 1181 * If $(D_PARAM that) is passed by value, it won't be copied, but moved. 1182 * This string will take the ownership over $(D_PARAM that)'s storage and 1183 * the allocator. 1184 * 1185 * If $(D_PARAM that) is passed by reference, it will be copied. 1186 * 1187 * Params: 1188 * S = Content type. 1189 * that = The value should be assigned. 1190 * 1191 * Returns: $(D_KEYWORD this). 1192 */ 1193 ref String opAssign(S)(S that) 1194 if (is(S == String)) 1195 { 1196 swap(this.data, that.data); 1197 swap(this.length_, that.length_); 1198 swap(this.capacity_, that.capacity_); 1199 swap(this.allocator_, that.allocator_); 1200 return this; 1201 } 1202 1203 /// ditto 1204 ref String opAssign(S)(ref S that) @trusted 1205 if (is(Unqual!S == String)) 1206 { 1207 reserve(that.length); 1208 that.data[0 .. that.length].copy(this.data[0 .. that.length]); 1209 this.length_ = that.length; 1210 return this; 1211 } 1212 1213 /// 1214 @nogc nothrow pure @safe unittest 1215 { 1216 auto s = String("Черная, потом пропахшая выть!"); 1217 s = String("Как мне тебя не ласкать, не любить?"); 1218 } 1219 1220 /** 1221 * Assigns a stringish range. 1222 * 1223 * Params: 1224 * S = String type. 1225 * that = Initial string. 1226 * 1227 * Returns: $(D_KEYWORD this). 1228 * 1229 * Throws: $(D_PSYMBOL UTFException). 1230 */ 1231 ref String opAssign(S)(S that) 1232 if (!isInfinite!S 1233 && isInputRange!S 1234 && isSomeChar!(ElementType!S)) 1235 { 1236 this.length_ = 0; 1237 insertBack(that); 1238 return this; 1239 } 1240 1241 /// 1242 @nogc nothrow pure @safe unittest 1243 { 1244 auto s = String("Оловом светится лужная голь..."); 1245 s = "Грустная песня, ты - русская боль."; 1246 assert(s == "Грустная песня, ты - русская боль."); 1247 } 1248 1249 /** 1250 * Comparison for equality. 1251 * 1252 * Params: 1253 * S = Right hand side type. 1254 * that = The string to compare with. 1255 * 1256 * Returns: A positive number if $(D_KEYWORD this) is lexicographically 1257 * greater than $(D_PARAM that), if equal `0`, else `-1`. 1258 */ 1259 int opCmp(S)(auto ref S that) const @trusted 1260 if (is(Unqual!S == String)) 1261 { 1262 return cmp(this.data[0 .. length], that.data[0 .. that.length]); 1263 } 1264 1265 /// ditto 1266 int opCmp(S)(ByCodeUnit!S that) const @trusted 1267 if (is(Unqual!S == char)) 1268 { 1269 return cmp(this.data[0 .. length], 1270 that.begin[0 .. that.end - that.begin]); 1271 } 1272 1273 /// ditto 1274 int opCmp(S)(ByCodePoint!S that) const @trusted 1275 if (is(Unqual!S == char)) 1276 { 1277 return cmp(this.data[0 .. length], 1278 that.begin[0 .. that.end - that.begin]); 1279 } 1280 1281 /// ditto 1282 int opCmp()(const char[] that) const @trusted 1283 { 1284 return cmp(this.data[0 .. length], that); 1285 } 1286 1287 /// 1288 @nogc nothrow pure @safe unittest 1289 { 1290 assert(String("Голубая кофта.") < String("Синие глаза.")); 1291 assert(String("Никакой я правды") < String("милой не сказал")[]); 1292 } 1293 1294 /** 1295 * Comparison for equality. 1296 * 1297 * Params: 1298 * S = Right hand side type. 1299 * that = The string to compare with. 1300 * 1301 * Returns: $(D_KEYWORD true) if the strings are equal, $(D_KEYWORD false) 1302 * otherwise. 1303 */ 1304 bool opEquals(S)(auto ref S that) const @trusted 1305 if (is(Unqual!S == String)) 1306 { 1307 return equal(this.data[0 .. length], that.data[0 .. that.length]); 1308 } 1309 1310 /** 1311 * Comparison for equality. 1312 * 1313 * Params: 1314 * that = Right hand side string range. 1315 * 1316 * Returns: $(D_KEYWORD true) if the string and the range are equal, 1317 * $(D_KEYWORD false) otherwise. 1318 */ 1319 bool opEquals(S)(ByCodeUnit!S that) const @trusted 1320 if (is(Unqual!S == char)) 1321 { 1322 return equal(this.data[0 .. length], 1323 that.begin[0 .. that.end - that.begin]); 1324 } 1325 1326 /// ditto 1327 bool opEquals(S)(ByCodePoint!S that) const @trusted 1328 if (is(Unqual!S == char)) 1329 { 1330 return equal(this.data[0 .. length], 1331 that.begin[0 .. that.end - that.begin]); 1332 } 1333 1334 /// ditto 1335 bool opEquals()(const char[] that) const @trusted 1336 { 1337 return equal(this.data[0 .. length], that); 1338 } 1339 1340 /// 1341 @nogc nothrow pure @safe unittest 1342 { 1343 assert(String("Милая спросила:") != String("Крутит ли метель?")); 1344 assert(String("Затопить бы печку,") != String("постелить постель.")[]); 1345 assert(const String("Я ответил милой:") != String("Нынче с высоты")); 1346 assert(String("Кто-то осыпает") != "белые цветы"); 1347 assert(const String("Затопи ты печку,") != String("постели постель,")[]); 1348 1349 auto s = const String("У меня на сердце"); 1350 assert(s[] != String("без тебя метель.")); 1351 assert(s == s); 1352 assert(s == s[]); 1353 assert(s == "У меня на сердце"); 1354 } 1355 1356 /** 1357 * Assigns a value to the character with the index $(D_PARAM pos). 1358 * 1359 * Params: 1360 * value = Value. 1361 * pos = Position. 1362 * 1363 * Returns: Assigned value. 1364 * 1365 * Precondition: $(D_INLINECODE length > pos). 1366 */ 1367 ref char opIndexAssign(const char value, const size_t pos) 1368 @nogc nothrow pure @safe 1369 { 1370 return opIndex(pos) = value; 1371 } 1372 1373 /// 1374 @nogc nothrow pure @safe unittest 1375 { 1376 auto s = String("alea iacta est."); 1377 1378 s[0] = 'A'; 1379 assert(s[0] == 'A'); 1380 } 1381 1382 /** 1383 * Slicing assignment. 1384 * 1385 * Params: 1386 * R = $(D_KEYWORD char). 1387 * value = Assigned character, range or string. 1388 * 1389 * Returns: Range over the string. 1390 * 1391 * Precondition: $(D_INLINECODE length == value.length). 1392 */ 1393 ByCodeUnit!char opIndexAssign(R)(ByCodeUnit!R value) 1394 if (is(Unqual!R == char)) 1395 { 1396 return opSliceAssign(value, 0, length); 1397 } 1398 1399 /// ditto 1400 ByCodeUnit!char opIndexAssign(const char value) @nogc nothrow pure @safe 1401 { 1402 return opSliceAssign(value, 0, length); 1403 } 1404 1405 /// ditto 1406 ByCodeUnit!char opIndexAssign(const char[] value) @nogc nothrow pure @safe 1407 { 1408 return opSliceAssign(value, 0, length); 1409 } 1410 1411 /** 1412 * Remove all characters beloning to $(D_PARAM r). 1413 * 1414 * Params: 1415 * R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint). 1416 * r = Range originally obtained from this string. 1417 * 1418 * Returns: A range spanning the remaining characters in the string that 1419 * initially were right after $(D_PARAM r). 1420 * 1421 * Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this). 1422 */ 1423 R remove(R)(R r) @trusted 1424 if (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char)) 1425 in 1426 { 1427 assert(r.container is &this); 1428 assert(r.begin >= this.data); 1429 assert(r.end <= this.data + length); 1430 } 1431 do 1432 { 1433 auto end = this.data + this.length; 1434 copy(ByCodeUnit!char(this, r.end, end), ByCodeUnit!char(this, r.begin, end)); 1435 this.length_ = length - (r.end - r.begin); 1436 return R(this, r.begin, this.data + length); 1437 } 1438 1439 /// 1440 @nogc pure @safe unittest 1441 { 1442 import std.algorithm.searching : count; 1443 1444 auto s = String("Из пословицы слова не выкинешь."); 1445 1446 assert(s.remove(s[5 .. 24]).length == 33); 1447 assert(s == "Из слова не выкинешь."); 1448 assert(s.length == 38); 1449 1450 auto byCodePoint = s.byCodePoint(); 1451 popFrontN(byCodePoint, 8); 1452 1453 assert(s.remove(byCodePoint).count == 0); 1454 assert(s == "Из слова"); 1455 1456 assert(s.remove(s[]).length == 0); 1457 assert(s.length == 0); 1458 1459 assert(s.remove(s[]).length == 0); 1460 } 1461 1462 /** 1463 * Inserts $(D_PARAM el) before or after $(D_PARAM r). 1464 * 1465 * Params: 1466 * R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint). 1467 * T = Stringish type. 1468 * r = Range originally obtained from this string. 1469 * el = Value(s) should be inserted. 1470 * 1471 * Returns: The number of elements inserted. 1472 * 1473 * Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this). 1474 */ 1475 size_t insertAfter(T, R)(R r, T el) @trusted 1476 if ((isSomeChar!T || (!isInfinite!T 1477 && isInputRange!T 1478 && isSomeChar!(ElementType!T))) 1479 && (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char))) 1480 in 1481 { 1482 assert(r.container is &this); 1483 assert(r.begin >= this.data); 1484 assert(r.end <= this.data + length); 1485 } 1486 do 1487 { 1488 const oldLength = length; 1489 const after = r.end - this.data; 1490 const inserted = insertBack(el); 1491 1492 bringToFront(this.data[after .. oldLength], this.data[oldLength .. length]); 1493 return inserted; 1494 } 1495 1496 /// 1497 @nogc nothrow pure @safe unittest 1498 { 1499 auto s = String("Казнить нельзя помиловать."); 1500 s.insertAfter(s[0 .. 27], ","); 1501 assert(s == "Казнить нельзя, помиловать."); 1502 1503 s = String("Казнить нельзя помиловать."); 1504 s.insertAfter(s[0 .. 14], ','); 1505 assert(s == "Казнить, нельзя помиловать."); 1506 } 1507 1508 /// 1509 size_t insertBefore(T, R)(R r, T el) @trusted 1510 if ((isSomeChar!T || (!isInfinite!T 1511 && isInputRange!T 1512 && isSomeChar!(ElementType!T))) 1513 && (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char))) 1514 in 1515 { 1516 assert(r.container is &this); 1517 assert(r.begin >= this.data); 1518 assert(r.end <= this.data + length); 1519 } 1520 do 1521 { 1522 return insertAfter(R(this, this.data, r.begin), el); 1523 } 1524 1525 /// 1526 @nogc nothrow pure @safe unittest 1527 { 1528 auto s = String("Казнить нельзя помиловать."); 1529 s.insertBefore(s[27 .. $], ","); 1530 assert(s == "Казнить нельзя, помиловать."); 1531 1532 s = String("Казнить нельзя помиловать."); 1533 s.insertBefore(s[14 .. $], ','); 1534 assert(s == "Казнить, нельзя помиловать."); 1535 } 1536 1537 /** 1538 * Calculates the hash value for the string. 1539 * 1540 * Returns: Hash value for the string. 1541 */ 1542 size_t toHash() const @nogc nothrow pure @safe 1543 { 1544 return hash(get); 1545 } 1546 1547 mixin DefaultAllocator; 1548 }