// SPDX-License-Identifier: APACHE-2.0 pragma solidity >=0.8.0 <0.9.0; /* * @title String & Slice utility library for Solidity contracts. * @author Nick Johnson * * @dev Functionality in this library is largely implemented using an * abstraction called a 'Slice'. A Slice represents a part of a string - * anything from the entire string to a single character, or even no * characters at all (a 0-length Slice). Since a Slice only has to specify * an offset and a length, copying and manipulating slices is a lot less * expensive than copying and manipulating the strings they reference. * * To further reduce gas costs, most functions on Slice that need to return * a Slice modify the original one instead of allocating a new one; for * instance, `s.split(".")` will return the text up to the first '.', * modifying s to only contain the remainder of the string after the '.'. * In situations where you do not want to modify the original Slice, you * can make a copy first with `.copy()`, for example: * `s.copy().split(".")`. Try and avoid using this idiom in loops; since * Solidity has no memory management, it will result in allocating many * short-lived slices that are later discarded. * * Functions that return two slices come in two versions: a non-allocating * version that takes the second Slice as an argument, modifying it in * place, and an allocating version that allocates and returns the second * Slice; see `nextRune` for example. * * Functions that have to copy string data will return strings rather than * slices; these can be cast back to slices for further processing if * required. * * For convenience, some functions are provided with non-modifying * variants that create a new Slice and return both; for instance, * `s.splitNew('.')` leaves s unmodified, and returns two values * corresponding to the left and right parts of the string. */ library Slices { struct Slice { uint _len; uint _ptr; } function _memcpy(uint _dest, uint _src, uint _len) private pure { // Copy word-length chunks while possible for(; _len >= 32; _len -= 32) { assembly { mstore(_dest, mload(_src)) } _dest += 32; _src += 32; } // Copy remaining bytes uint _mask = type(uint).max; if (_len > 0) { _mask = 256 ** (32 - _len) - 1; } assembly { let srcpart := and(mload(_src), not(_mask)) let destpart := and(mload(_dest), _mask) mstore(_dest, or(destpart, srcpart)) } } /* * @dev Returns a Slice containing the entire string. * @param self The string to make a Slice from. * @return A newly allocated Slice containing the entire string. */ function toSlice(string memory self) internal pure returns (Slice memory) { uint ptr; assembly { ptr := add(self, 0x20) } return Slice(bytes(self).length, ptr); } /* * @dev Returns the length of a null-terminated bytes32 string. * @param self The value to find the length of. * @return The length of the string, from 0 to 32. */ function len(bytes32 self) internal pure returns (uint) { uint ret; if (self == 0) return 0; if (uint(self) & type(uint128).max == 0) { ret += 16; self = bytes32(uint(self) / 0x100000000000000000000000000000000); } if (uint(self) & type(uint64).max == 0) { ret += 8; self = bytes32(uint(self) / 0x10000000000000000); } if (uint(self) & type(uint32).max == 0) { ret += 4; self = bytes32(uint(self) / 0x100000000); } if (uint(self) & type(uint16).max == 0) { ret += 2; self = bytes32(uint(self) / 0x10000); } if (uint(self) & type(uint8).max == 0) { ret += 1; } return 32 - ret; } /* * @dev Returns a Slice containing the entire bytes32, interpreted as a * null-terminated utf-8 string. * @param self The bytes32 value to convert to a Slice. * @return A new Slice containing the value of the input argument up to the * first null. */ function toSliceB32(bytes32 self) internal pure returns (Slice memory ret) { // Allocate space for `self` in memory, copy it there, and point ret at it assembly { let ptr := mload(0x40) mstore(0x40, add(ptr, 0x20)) mstore(ptr, self) mstore(add(ret, 0x20), ptr) } ret._len = len(self); } /* * @dev Returns a new Slice containing the same data as the current Slice. * @param self The Slice to copy. * @return A new Slice containing the same data as `self`. */ function copy(Slice memory self) internal pure returns (Slice memory) { return Slice(self._len, self._ptr); } /* * @dev Copies a Slice to a new string. * @param self The Slice to copy. * @return A newly allocated string containing the Slice's text. */ function toString(Slice memory self) internal pure returns (string memory) { string memory ret = new string(self._len); uint retptr; assembly { retptr := add(ret, 32) } _memcpy(retptr, self._ptr, self._len); return ret; } /* * @dev Returns the length in runes of the Slice. Note that this operation * takes time proportional to the length of the Slice; avoid using it * in loops, and call `Slice.empty()` if you only need to know whether * the Slice is empty or not. * @param self The Slice to operate on. * @return The length of the Slice in runes. */ function len(Slice memory self) internal pure returns (uint _l) { // Starting at ptr-31 means the LSB will be the byte we care about uint ptr = self._ptr - 31; uint end = ptr + self._len; for (_l = 0; ptr < end; _l++) { uint8 b; assembly { b := and(mload(ptr), 0xFF) } if (b < 0x80) { ptr += 1; } else if(b < 0xE0) { ptr += 2; } else if(b < 0xF0) { ptr += 3; } else if(b < 0xF8) { ptr += 4; } else if(b < 0xFC) { ptr += 5; } else { ptr += 6; } } } /* * @dev Returns true if the Slice is empty (has a length of 0). * @param self The Slice to operate on. * @return True if the Slice is empty, False otherwise. */ function empty(Slice memory self) internal pure returns (bool) { return self._len == 0; } /* * @dev Returns a positive number if `other` comes lexicographically after * `self`, a negative number if it comes before, or zero if the * contents of the two slices are equal. Comparison is done per-rune, * on unicode codepoints. * @param self The first Slice to compare. * @param other The second Slice to compare. * @return The result of the comparison. */ function compare(Slice memory self, Slice memory other) internal pure returns (int) { uint shortest = self._len; if (other._len < self._len) shortest = other._len; uint selfptr = self._ptr; uint otherptr = other._ptr; for (uint idx = 0; idx < shortest; idx += 32) { uint a; uint b; assembly { a := mload(selfptr) b := mload(otherptr) } if (a != b) { // Mask out irrelevant bytes and check again uint mask = type(uint).max; // 0xffff... if(shortest < 32) { mask = ~(2 ** (8 * (32 - shortest + idx)) - 1); } unchecked { uint diff = (a & mask) - (b & mask); if (diff != 0) return int(diff); } } selfptr += 32; otherptr += 32; } return int(self._len) - int(other._len); } /* * @dev Returns true if the two slices contain the same text. * @param self The first Slice to compare. * @param self The second Slice to compare. * @return True if the slices are equal, false otherwise. */ function equals(Slice memory self, Slice memory other) internal pure returns (bool) { return compare(self, other) == 0; } /* * @dev Extracts the first rune in the Slice into `rune`, advancing the * Slice to point to the next rune and returning `self`. * @param self The Slice to operate on. * @param rune The Slice that will contain the first rune. * @return `rune`. */ function nextRune(Slice memory self, Slice memory rune) internal pure returns (Slice memory) { rune._ptr = self._ptr; if (self._len == 0) { rune._len = 0; return rune; } uint _l; uint _b; // Load the first byte of the rune into the LSBs of _b assembly { _b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) } if (_b < 0x80) { _l = 1; } else if(_b < 0xE0) { _l = 2; } else if(_b < 0xF0) { _l = 3; } else { _l = 4; } // Check for truncated codepoints if (_l > self._len) { rune._len = self._len; self._ptr += self._len; self._len = 0; return rune; } self._ptr += _l; self._len -= _l; rune._len = _l; return rune; } /* * @dev Returns the first rune in the Slice, advancing the Slice to point * to the next rune. * @param self The Slice to operate on. * @return A Slice containing only the first rune from `self`. */ function nextRune(Slice memory self) internal pure returns (Slice memory ret) { nextRune(self, ret); } /* * @dev Returns the number of the first codepoint in the Slice. * @param self The Slice to operate on. * @return The number of the first codepoint in the Slice. */ function ord(Slice memory self) internal pure returns (uint ret) { if (self._len == 0) { return 0; } uint word; uint length; uint divisor = 2 ** 248; // Load the rune into the MSBs of b assembly { word:= mload(mload(add(self, 32))) } uint b = word / divisor; if (b < 0x80) { ret = b; length = 1; } else if(b < 0xE0) { ret = b & 0x1F; length = 2; } else if(b < 0xF0) { ret = b & 0x0F; length = 3; } else { ret = b & 0x07; length = 4; } // Check for truncated codepoints if (length > self._len) { return 0; } for (uint i = 1; i < length; i++) { divisor = divisor / 256; b = (word / divisor) & 0xFF; if (b & 0xC0 != 0x80) { // Invalid UTF-8 sequence return 0; } ret = (ret * 64) | (b & 0x3F); } return ret; } /* * @dev Returns the keccak-256 hash of the Slice. * @param self The Slice to hash. * @return The hash of the Slice. */ function keccak(Slice memory self) internal pure returns (bytes32 ret) { assembly { ret := keccak256(mload(add(self, 32)), mload(self)) } } /* * @dev Returns true if `self` starts with `needle`. * @param self The Slice to operate on. * @param needle The Slice to search for. * @return True if the Slice starts with the provided text, false otherwise. */ function startsWith(Slice memory self, Slice memory needle) internal pure returns (bool) { if (self._len < needle._len) { return false; } if (self._ptr == needle._ptr) { return true; } bool equal; assembly { let length := mload(needle) let selfptr := mload(add(self, 0x20)) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length)) } return equal; } /* * @dev If `self` starts with `needle`, `needle` is removed from the * beginning of `self`. Otherwise, `self` is unmodified. * @param self The Slice to operate on. * @param needle The Slice to search for. * @return `self` */ function beyond(Slice memory self, Slice memory needle) internal pure returns (Slice memory) { if (self._len < needle._len) { return self; } bool equal = true; if (self._ptr != needle._ptr) { assembly { let length := mload(needle) let selfptr := mload(add(self, 0x20)) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length)) } } if (equal) { self._len -= needle._len; self._ptr += needle._len; } return self; } /* * @dev Returns true if the Slice ends with `needle`. * @param self The Slice to operate on. * @param needle The Slice to search for. * @return True if the Slice starts with the provided text, false otherwise. */ function endsWith(Slice memory self, Slice memory needle) internal pure returns (bool) { if (self._len < needle._len) { return false; } uint selfptr = self._ptr + self._len - needle._len; if (selfptr == needle._ptr) { return true; } bool equal; assembly { let length := mload(needle) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length)) } return equal; } /* * @dev If `self` ends with `needle`, `needle` is removed from the * end of `self`. Otherwise, `self` is unmodified. * @param self The Slice to operate on. * @param needle The Slice to search for. * @return `self` */ function until(Slice memory self, Slice memory needle) internal pure returns (Slice memory) { if (self._len < needle._len) { return self; } uint selfptr = self._ptr + self._len - needle._len; bool equal = true; if (selfptr != needle._ptr) { assembly { let length := mload(needle) let needleptr := mload(add(needle, 0x20)) equal := eq(keccak256(selfptr, length), keccak256(needleptr, length)) } } if (equal) { self._len -= needle._len; } return self; } // Returns the memory address of the first byte of the first occurrence of // `needle` in `self`, or the first byte after `self` if not found. function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private pure returns (uint) { uint ptr = selfptr; uint idx; if (needlelen <= selflen) { if (needlelen <= 32) { bytes32 mask; if (needlelen > 0) { mask = bytes32(~(2 ** (8 * (32 - needlelen)) - 1)); } bytes32 needledata; assembly { needledata := and(mload(needleptr), mask) } uint end = selfptr + selflen - needlelen; bytes32 ptrdata; assembly { ptrdata := and(mload(ptr), mask) } while (ptrdata != needledata) { if (ptr >= end) return selfptr + selflen; ptr++; assembly { ptrdata := and(mload(ptr), mask) } } return ptr; } else { // For long needles, use hashing bytes32 hash; assembly { hash := keccak256(needleptr, needlelen) } for (idx = 0; idx <= selflen - needlelen; idx++) { bytes32 testHash; assembly { testHash := keccak256(ptr, needlelen) } if (hash == testHash) return ptr; ptr += 1; } } } return selfptr + selflen; } // Returns the memory address of the first byte after the last occurrence of // `needle` in `self`, or the address of `self` if not found. function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private pure returns (uint) { uint ptr; if (needlelen <= selflen) { if (needlelen <= 32) { bytes32 mask; if (needlelen > 0) { mask = bytes32(~(2 ** (8 * (32 - needlelen)) - 1)); } bytes32 needledata; assembly { needledata := and(mload(needleptr), mask) } ptr = selfptr + selflen - needlelen; bytes32 ptrdata; assembly { ptrdata := and(mload(ptr), mask) } while (ptrdata != needledata) { if (ptr <= selfptr) return selfptr; ptr--; assembly { ptrdata := and(mload(ptr), mask) } } return ptr + needlelen; } else { // For long needles, use hashing bytes32 hash; assembly { hash := keccak256(needleptr, needlelen) } ptr = selfptr + (selflen - needlelen); while (ptr >= selfptr) { bytes32 testHash; assembly { testHash := keccak256(ptr, needlelen) } if (hash == testHash) return ptr + needlelen; ptr -= 1; } } } return selfptr; } /* * @dev Modifies `self` to contain everything from the first occurrence of * `needle` to the end of the Slice. `self` is set to the empty Slice * if `needle` is not found. * @param self The Slice to search and modify. * @param needle The text to search for. * @return `self`. */ function find(Slice memory self, Slice memory needle) internal pure returns (Slice memory) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); self._len -= ptr - self._ptr; self._ptr = ptr; return self; } /* * @dev Modifies `self` to contain the part of the string from the start of * `self` to the end of the first occurrence of `needle`. If `needle` * is not found, `self` is set to the empty Slice. * @param self The Slice to search and modify. * @param needle The text to search for. * @return `self`. */ function rfind(Slice memory self, Slice memory needle) internal pure returns (Slice memory) { uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); self._len = ptr - self._ptr; return self; } /* * @dev Splits the Slice, setting `self` to everything after the first * occurrence of `needle`, and `token` to everything before it. If * `needle` does not occur in `self`, `self` is set to the empty Slice, * and `token` is set to the entirety of `self`. * @param self The Slice to split. * @param needle The text to search for in `self`. * @param token An output parameter to which the first token is written. * @return `token`. */ function split(Slice memory self, Slice memory needle, Slice memory token) internal pure returns (Slice memory) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); token._ptr = self._ptr; token._len = ptr - self._ptr; if (ptr == self._ptr + self._len) { // Not found self._len = 0; } else { self._len -= token._len + needle._len; self._ptr = ptr + needle._len; } return token; } /* * @dev Splits the Slice, setting `self` to everything after the first * occurrence of `needle`, and returning everything before it. If * `needle` does not occur in `self`, `self` is set to the empty Slice, * and the entirety of `self` is returned. * @param self The Slice to split. * @param needle The text to search for in `self`. * @return The part of `self` up to the first occurrence of `delim`. */ function split(Slice memory self, Slice memory needle) internal pure returns (Slice memory token) { split(self, needle, token); } /* * @dev Splits the Slice, setting `self` to everything before the last * occurrence of `needle`, and `token` to everything after it. If * `needle` does not occur in `self`, `self` is set to the empty Slice, * and `token` is set to the entirety of `self`. * @param self The Slice to split. * @param needle The text to search for in `self`. * @param token An output parameter to which the first token is written. * @return `token`. */ function rsplit(Slice memory self, Slice memory needle, Slice memory token) internal pure returns (Slice memory) { uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); token._ptr = ptr; token._len = self._len - (ptr - self._ptr); if (ptr == self._ptr) { // Not found self._len = 0; } else { self._len -= token._len + needle._len; } return token; } /* * @dev Splits the Slice, setting `self` to everything before the last * occurrence of `needle`, and returning everything after it. If * `needle` does not occur in `self`, `self` is set to the empty Slice, * and the entirety of `self` is returned. * @param self The Slice to split. * @param needle The text to search for in `self`. * @return The part of `self` after the last occurrence of `delim`. */ function rsplit(Slice memory self, Slice memory needle) internal pure returns (Slice memory token) { rsplit(self, needle, token); } /* * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`. * @param self The Slice to search. * @param needle The text to search for in `self`. * @return The number of occurrences of `needle` found in `self`. */ function count(Slice memory self, Slice memory needle) internal pure returns (uint cnt) { uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len; while (ptr <= self._ptr + self._len) { cnt++; ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len; } } /* * @dev Returns True if `self` contains `needle`. * @param self The Slice to search. * @param needle The text to search for in `self`. * @return True if `needle` is found in `self`, false otherwise. */ function contains(Slice memory self, Slice memory needle) internal pure returns (bool) { return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr; } /* * @dev Returns a newly allocated string containing the concatenation of * `self` and `other`. * @param self The first Slice to concatenate. * @param other The second Slice to concatenate. * @return The concatenation of the two strings. */ function concat(Slice memory self, Slice memory other) internal pure returns (string memory) { string memory ret = new string(self._len + other._len); uint retptr; assembly { retptr := add(ret, 32) } _memcpy(retptr, self._ptr, self._len); _memcpy(retptr + self._len, other._ptr, other._len); return ret; } /* * @dev Joins an array of slices, using `self` as a delimiter, returning a * newly allocated string. * @param self The delimiter to use. * @param parts A list of slices to join. * @return A newly allocated string containing all the slices in `parts`, * joined with `self`. */ function join(Slice memory self, Slice[] memory parts) internal pure returns (string memory) { if (parts.length == 0) return ""; uint length = self._len * (parts.length - 1); for(uint i = 0; i < parts.length; i++) length += parts[i]._len; string memory ret = new string(length); uint retptr; assembly { retptr := add(ret, 32) } for(uint i = 0; i < parts.length; i++) { _memcpy(retptr, parts[i]._ptr, parts[i]._len); retptr += parts[i]._len; if (i < parts.length - 1) { _memcpy(retptr, self._ptr, self._len); retptr += self._len; } } return ret; } }