UNPKG

kuvio

Version:

Create string patterns and derive things from them, such as regexes

980 lines (963 loc) 22.8 kB
import { __export } from "./chunk-QXAXOUZS.mjs"; // src/base.ts import { match } from "@simspace/matchers"; import * as RA from "fp-ts/ReadonlyArray"; var char = (c) => ({ tag: "atom", kind: "character", char: c }); var anything = { tag: "atom", kind: "anything" }; var convertRanges = RA.map((range) => { if (typeof range === "string") { return { lower: range.charCodeAt(0), upper: range.charCodeAt(0) }; } const [c1, c2] = range; const lower2 = typeof c1 === "string" ? c1.charCodeAt(0) : c1; const upper2 = typeof c2 === "string" ? c2.charCodeAt(0) : c2; return { lower: lower2, upper: upper2 }; }); var characterClass = (exclude, ...ranges) => ({ tag: "atom", kind: "characterClass", exclude, ranges: convertRanges(ranges) }); var subgroup = (subpattern) => subpattern.tag === "atom" ? subpattern : { tag: "atom", kind: "subgroup", subpattern }; var anyNumber = (opts = { greedy: false }) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: opts.greedy, kind: "star" }); var atLeastOne = (opts = { greedy: false }) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: opts.greedy, kind: "plus" }); var maybe = (atom) => ({ tag: "quantifiedAtom", atom, greedy: false, kind: "question" }); var times = (count) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: true, kind: "exactly", count }); var exactly = times; var atLeast = (min) => (atom) => ({ tag: "quantifiedAtom", atom, kind: "minimum", min }); var between = (min, max) => (atom) => ({ tag: "quantifiedAtom", atom, greedy: true, kind: "between", min, max }); var atMost = (max) => (atom) => ({ tag: "quantifiedAtom", atom, kind: "between", min: 0, max }); var or = (right) => (left) => ({ tag: "disjunction", left, right }); var getTerms = match.w({ termSequence: ({ terms }) => terms, atom: (atom) => [atom], quantifiedAtom: (qatom) => [qatom] }); var then = (term) => (alt) => ({ tag: "termSequence", terms: [...getTerms(alt), ...getTerms(term)] }); var exactString = (s) => subgroup({ tag: "termSequence", terms: s.split("").map(char) }); var sequence = (term, ...terms) => ({ tag: "termSequence", terms: [term, ...terms] }); var and = (first, ...addl) => (cc) => ({ tag: "atom", kind: "characterClass", exclude: cc.exclude, ranges: cc.ranges.concat( typeof first === "string" || first instanceof Array ? convertRanges([first, ...addl]) : first.ranges ) }); var non = (cc) => ({ ...cc, exclude: !cc.exclude }); var empty = { tag: "atom", kind: "character", char: "" }; // src/character-classes.ts import { pipe } from "fp-ts/function"; var upper = characterClass(false, ["A", "Z"]); var lower = characterClass(false, ["a", "z"]); var alpha = pipe(upper, and(lower)); var digit = characterClass(false, ["0", "9"]); var xdigit = pipe(digit, and(["A", "F"], ["a", "f"])); var hexDigit = xdigit; var alnum = pipe(alpha, and(digit)); var word = pipe(alnum, and("_")); var punct = characterClass( false, ["!", "/"], [":", "@"], ["[", "_"], ["{", "~"] ); var blank = characterClass(false, " ", " "); var space = pipe(blank, and("\n", "\r", "\f", "\v")); var graph = characterClass(false, [33, 127]); var print = pipe(graph, and(" ")); // src/combinators.ts import * as RA2 from "fp-ts/ReadonlyArray"; import { pipe as pipe2 } from "fp-ts/function"; var oneOf = (pattern, ...patterns) => pipe2( patterns, RA2.reduce(pattern, (ored, next) => pipe2(ored, or(next))) ); var integerRange_ = (min, max, omitInitialZeros = false) => { const curMinDigit = Number(min[0] ?? "0"); const restMin = min.slice(1); const curMaxDigit = Number(max[0] ?? "9"); const restMax = max.slice(1); const res = restMin.length === 0 ? curMinDigit === curMaxDigit ? char(min) : characterClass(false, [min, max]) : curMinDigit === curMaxDigit ? pipe2( char(curMinDigit.toString(10)), then(subgroup(integerRange_(restMin, restMax))) ) : oneOf( curMinDigit === 0 && omitInitialZeros ? integerRange_(restMin, restMax.replace(/./g, "9"), true) : pipe2( char(curMinDigit.toString(10)), then( subgroup(integerRange_(restMin, restMin.replace(/./g, "9"))) ) ), ...curMaxDigit - curMinDigit > 1 ? [ pipe2( characterClass(false, [ (curMinDigit + 1).toString(10), (curMaxDigit - 1).toString(10) ]), then(sequence(empty, ...restMin.split("").map(() => digit))) ) ] : [], pipe2( char(curMaxDigit.toString(10)), then(subgroup(integerRange_(restMin.replace(/./g, "0"), restMax))) ) ); return res; }; var integerRange = (min, max) => { if (min > max || Number.isNaN(min) || Number.isNaN(max) || !Number.isInteger(min) || !Number.isInteger(max) || min < 0 || max < 0) { return empty; } const maxStr = max.toString(10); const minStr = min.toString(10).padStart(maxStr.length, "0"); return integerRange_(minStr, maxStr, true); }; // src/regex.ts import { match as match2 } from "@simspace/matchers"; import * as O from "fp-ts/Option"; import * as RNEA from "fp-ts/ReadonlyNonEmptyArray"; import { pipe as pipe3 } from "fp-ts/function"; var matchK = match2.on("kind").w; var repr = (n) => ( // < 32 -> control characters // 45 -> '-'.. seems like `/[--z]/` for example actually works, but looks weird. // 93 -> ']' which needs to be escaped // 94 -> '^' which might get parsed as class exclusion marker, so escape just in case // 127 -> del // >127 -> outside normal ascii range. escape 'em n < 32 || n === 45 || n === 93 || n === 94 || n >= 127 ? n > 255 ? `\\u${n.toString(16).padStart(4, "0")}` : `\\x${n.toString(16).padStart(2, "0")}` : String.fromCharCode(n) ); var regexStringFromAtom = matchK({ anything: () => ".", character: ({ char: char2 }) => char2 === "[" ? "\\[" : char2 === "]" ? "\\]" : char2 === "." ? "\\." : char2 === "(" ? "\\(" : char2 === ")" ? "\\)" : char2 === "+" ? "\\+" : char2, characterClass: ({ exclude, ranges }) => pipe3( RNEA.fromReadonlyArray(ranges), O.chain(O.fromPredicate((s) => s.length === 1)), O.chain( ([{ lower: lower2, upper: upper2 }]) => lower2 === 48 && upper2 === 57 ? O.some("\\d") : O.none ), O.getOrElse( () => `[${exclude ? "^" : ""}${ranges.map( ({ lower: lower2, upper: upper2 }) => lower2 === upper2 ? repr(lower2) : `${repr(lower2)}-${repr(upper2)}` ).join("")}]` ) ), subgroup: ({ subpattern }) => `(${regexStringFromPattern(subpattern)})` }); var regexStringFromQuantifiedAtom = matchK({ star: ({ atom, greedy }) => `${regexStringFromAtom(atom)}*${greedy ? "" : "?"}`, plus: ({ atom, greedy }) => `${regexStringFromAtom(atom)}+${greedy ? "" : "?"}`, question: ({ atom }) => `${regexStringFromAtom(atom)}?`, exactly: ({ atom, count }) => `${regexStringFromAtom(atom)}{${count}}`, between: ({ atom, min, max }) => `${regexStringFromAtom(atom)}{${min},${max}}`, minimum: ({ atom, min }) => `${regexStringFromAtom(atom)}{${min},}` }); var regexStringFromTerm = match2.w({ atom: regexStringFromAtom, quantifiedAtom: regexStringFromQuantifiedAtom }); var regexStringFromPattern = match2.w({ atom: regexStringFromAtom, disjunction: ({ left, right }) => `${regexStringFromPattern(left)}|${regexStringFromPattern(right)}`, quantifiedAtom: regexStringFromQuantifiedAtom, termSequence: ({ terms }) => terms.map(regexStringFromTerm).join("") }); var regexFromPattern = (pattern, caseInsensitive = false) => new RegExp( `^(${regexStringFromPattern(pattern)})$`, caseInsensitive ? "i" : "" ); // src/patterns/index.ts var patterns_exports = {}; __export(patterns_exports, { anyUUID: () => anyUUID, base64: () => base64, base64Character: () => base64Character, base64Url: () => base64Url, creditCard: () => creditCard, emailAddress: () => emailAddress, hexColor: () => hexColor, hexadecimal: () => hexadecimal, hslColor: () => hslColor, jwt: () => jwt, latLong: () => latLong, rgbColor: () => rgbColor, rgbColorDecimal: () => rgbColorDecimal, rgbColorPercent: () => rgbColorPercent, rgbColorWithAlphaDecimal: () => rgbColorWithAlphaDecimal, rgbColorWithAlphaPercent: () => rgbColorWithAlphaPercent, uuidV1: () => uuidV1, uuidV2: () => uuidV2, uuidV3: () => uuidV3, uuidV4: () => uuidV4, uuidV5: () => uuidV5 }); // src/patterns/base64.ts import { pipe as pipe4 } from "fp-ts/function"; var base64Character = pipe4(alnum, and(characterClass(false, "+", "/"))); var base64 = pipe4( base64Character, exactly(4), subgroup, anyNumber(), then( maybe( subgroup( oneOf( sequence(exactly(2)(base64Character), exactly(2)(char("="))), sequence(exactly(3)(base64Character), char("=")) ) ) ) ) ); // src/patterns/base64url.ts import { pipe as pipe5 } from "fp-ts/function"; var base64Url = pipe5( word, and("-"), anyNumber({ greedy: true }) ); // src/patterns/credit-card.ts import { pipe as pipe6 } from "fp-ts/function"; var visa = pipe6( char("4"), then(pipe6(exactly(12)(digit), or(exactly(15)(digit)), subgroup)) ); var mastercard = pipe6( subgroup( pipe6( sequence(char("5"), characterClass(false, ["1", "5"]), exactly(4)(digit)), or( sequence( exactString("222"), characterClass(false, ["1", "9"]), exactly(2)(digit) ) ), or( sequence( exactString("22"), characterClass(false, ["3", "9"]), exactly(3)(digit) ) ), or( sequence( exactString("2"), characterClass(false, ["3", "6"]), exactly(4)(digit) ) ), or( sequence( exactString("27"), characterClass(false, "0", "1"), exactly(3)(digit) ) ), or(sequence(exactString("2720"), exactly(2)(digit))) ) ), then(exactly(10)(digit)) ); var amex = sequence( char("3"), characterClass(false, "4", "7"), exactly(13)(digit) ); var dinersClub = pipe6( sequence( char("3"), subgroup( pipe6( sequence( char("0"), subgroup( pipe6( sequence(characterClass(false, ["0", "5"]), exactly(5)(digit)), or(sequence(exactString("95"), exactly(4)(digit))) ) ) ), or(sequence(characterClass(false, "8", "9"), exactly(6)(digit))) ) ), between(8, 11)(digit) ), or(sequence(exactString("36"), exactly(6)(digit), between(6, 11)(digit))), subgroup ); var discover = pipe6( oneOf( pipe6( exactString("6011"), then( subgroup( oneOf( sequence( char("0"), characterClass(false, ["5", "9"]), exactly(2)(digit) ), sequence(characterClass(false, ["2", "4"]), exactly(3)(digit)), sequence(exactString("74"), exactly(2)(digit)), sequence( exactString("7"), characterClass(false, ["7", "9"]), exactly(2)(digit) ), sequence( exactString("8"), characterClass(false, ["6", "9"]), exactly(2)(digit) ), sequence(exactString("9"), exactly(3)(digit)) ) ) ) ), sequence( exactString("64"), characterClass(false, ["4", "9"]), exactly(5)(digit) ), sequence( exactString("650"), characterClass(false, ["0", "5"]), exactly(4)(digit) ), sequence( exactString("65060"), characterClass(false, ["1", "9"]), exactly(2)(digit) ), sequence( exactString("65061"), characterClass(false, ["1", "9"]), exactly(2)(digit) ), sequence( exactString("6506"), characterClass(false, ["2", "9"]), exactly(3)(digit) ), sequence( exactString("650"), characterClass(false, ["7", "9"]), exactly(4)(digit) ), sequence( exactString("65"), characterClass(false, ["1", "9"]), exactly(5)(digit) ) ), subgroup, then(between(8, 11)(digit)) ); var jcb = pipe6( sequence( exactString("352"), characterClass(false, "8", "9"), exactly(4)(digit) ), or( sequence( exactString("35"), characterClass(false, ["3", "8"]), exactly(5)(digit) ) ), subgroup, then(between(8, 11)(digit)) ); var rupay = subgroup( oneOf( sequence( subgroup( oneOf( exactString("60"), exactString("65"), exactString("81"), exactString("82") ) ), exactly(14)(digit) ), sequence(exactString("508"), exactly(14)(digit)) ) ); var unionPay = sequence( exactString("62"), subgroup( oneOf( sequence( char("2"), subgroup( oneOf( sequence( exactString("12"), characterClass(false, ["6", "9"]), exactly(2)(digit) ), sequence( char("1"), characterClass(false, ["3", "9"]), exactly(3)(digit) ), sequence(characterClass(false, ["2", "8"]), digit), sequence( exactString("9"), characterClass(false, "0", "1"), exactly(3)(digit) ), sequence( exactString("92"), characterClass(false, ["0", "5"]), exactly(2)(digit) ) ) ) ), sequence(characterClass(false, ["4", "6"]), exactly(5)(digit)), sequence( exactString("8"), characterClass(false, ["2", "8"]), exactly(4)(digit) ) ) ), between(8, 11)(digit) ); var creditCard = oneOf( visa, mastercard, amex, dinersClub, discover, jcb, rupay, unionPay ); // src/patterns/email-address.ts import { pipe as pipe7 } from "fp-ts/function"; var localPartQuoted = pipe7( char('"'), then(atLeastOne({ greedy: true })(characterClass(true, '"', [0, 31]))), then(char('"')) ); var localPartUnquotedAllowedCharacters = characterClass( false, ["A", "Z"], ["a", "z"], ["0", "9"], "!", "#", "$", "%", "&", "'", "*", "+", "-", "/", "=", "?", "^", "_", "`", "{", "|", "}", "~" ); var localPartUnquoted = pipe7( atLeastOne({ greedy: true })(localPartUnquotedAllowedCharacters), then( pipe7( char("."), then(atLeastOne({ greedy: true })(localPartUnquotedAllowedCharacters)), subgroup, anyNumber({ greedy: true }) ) ) ); var localPart = pipe7(localPartUnquoted, or(localPartQuoted), subgroup); var ipAddressByte = between(1, 3)(digit); var domainIpAddress = pipe7( sequence( char("["), ipAddressByte, char("."), ipAddressByte, char("."), ipAddressByte, char("."), ipAddressByte, char("]") ) ); var domainName = pipe7( alnum, and("-"), atMost(63), then(char(".")), subgroup, atLeastOne({ greedy: true }), then(atLeast(2)(alpha)) ); var domain = pipe7(domainIpAddress, or(domainName), subgroup); var emailAddress = pipe7( localPart, then(char("@")), then(domain) ); // src/patterns/hex-color.ts import { pipe as pipe8 } from "fp-ts/function"; var hexColor = pipe8( maybe(char("#")), then( subgroup( pipe8( between(3, 4)(hexDigit), or(exactly(6)(hexDigit)), or(exactly(8)(hexDigit)) ) ) ) ); // src/patterns/hexadecimal.ts import { pipe as pipe9 } from "fp-ts/function"; var hexadecimal = pipe9( exactString("0x"), or(exactString("0X")), or(exactString("0h")), or(exactString("0H")), subgroup, maybe, then(atLeastOne()(xdigit)) ); // src/patterns/hsl-color.ts import { pipe as pipe10 } from "fp-ts/function"; var anyDecimal = subgroup( sequence(char("."), atLeastOne({ greedy: true })(digit)) ); var zeroDecimal = subgroup( sequence(char("."), atLeastOne({ greedy: true })(char("0"))) ); var exponential = subgroup( sequence( char("e"), maybe(subgroup(oneOf(char("+"), char("-")))), atLeastOne({ greedy: true })(digit) ) ); var hue = subgroup( sequence( maybe(subgroup(oneOf(char("+"), char("-")))), subgroup( oneOf( pipe10(atLeastOne({ greedy: true })(digit), then(maybe(anyDecimal))), anyDecimal ) ), maybe(exponential), maybe( subgroup( oneOf( exactString("deg"), exactString("grad"), exactString("rad"), exactString("turn") ) ) ) ) ); var percentage = subgroup( sequence( maybe(char("+")), anyNumber({ greedy: true })(char("0")), subgroup( oneOf( pipe10(exactString("100"), then(maybe(zeroDecimal))), pipe10(subgroup(integerRange(0, 99)), then(maybe(anyDecimal))), anyDecimal ) ), maybe(exponential), char("%") ) ); var alpha2 = subgroup( sequence( anyNumber({ greedy: true })(digit), subgroup(oneOf(digit, anyDecimal)), maybe(exponential), maybe(char("%")) ) ); var anySpace = anyNumber({ greedy: true })(blank); var commaDelimiter = subgroup(sequence(anySpace, char(","), anySpace)); var slashDelimiter = subgroup(sequence(anySpace, char("/"), anySpace)); var hslColor = sequence( exactString("hsl"), maybe(char("a")), char("("), anySpace, hue, subgroup( oneOf( sequence( commaDelimiter, percentage, commaDelimiter, percentage, maybe(subgroup(sequence(commaDelimiter, alpha2))) ), sequence( anySpace, percentage, anySpace, percentage, maybe(subgroup(sequence(slashDelimiter, alpha2))) ) ) ), anySpace, char(")") ); // src/patterns/jwt.ts import { pipe as pipe11 } from "fp-ts/function"; var jwt = sequence( subgroup(base64Url), char("."), subgroup(base64Url), pipe11(char("."), then(subgroup(base64Url)), subgroup, maybe) ); // src/patterns/lat-long.ts import { pipe as pipe12 } from "fp-ts/function"; var latPattern = pipe12( maybe(characterClass(false, "+", "-")), then( subgroup( oneOf( sequence( char("9"), char("0"), maybe( subgroup( pipe12(char("."), then(atLeastOne({ greedy: true })(char("0")))) ) ) ), pipe12( integerRange(0, 89), subgroup, then( maybe( subgroup( pipe12(char("."), then(atLeastOne({ greedy: true })(digit))) ) ) ) ) ) ) ) ); var longPattern = pipe12( maybe(characterClass(false, "+", "-")), then( subgroup( oneOf( sequence( char("1"), char("8"), char("0"), maybe( subgroup( pipe12(char("."), then(atLeastOne({ greedy: true })(char("0")))) ) ) ), pipe12( integerRange(0, 179), subgroup, then( maybe( subgroup( pipe12(char("."), then(atLeastOne({ greedy: true })(digit))) ) ) ) ) ) ) ) ); var latLong = oneOf( pipe12( latPattern, then(char(",")), then(anyNumber({ greedy: true })(space)), then(longPattern) ), pipe12( char("("), then(latPattern), then(char(",")), then(anyNumber({ greedy: true })(space)), then(longPattern), then(char(")")) ) ); // src/patterns/rgb-color.ts var rgbColorDecimal = sequence( exactString("rgb("), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(")") ); var rgbColorWithAlphaDecimal = sequence( exactString("rgba("), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup(integerRange(0, 255)), char(","), subgroup( oneOf( char("0"), char("1"), exactString("1.0"), sequence( maybe(char("0")), char("."), atLeastOne({ greedy: true })(digit) ) ) ), char(")") ); var rgbColorPercent = sequence( exactString("rgb("), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%)") ); var rgbColorWithAlphaPercent = sequence( exactString("rgba("), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup(integerRange(0, 100)), exactString("%,"), subgroup( oneOf( char("0"), char("1"), exactString("1.0"), sequence( maybe(char("0")), char("."), atLeastOne({ greedy: true })(digit) ) ) ), char(")") ); var rgbColor = oneOf( rgbColorDecimal, rgbColorWithAlphaDecimal, rgbColorPercent, rgbColorWithAlphaPercent ); // src/patterns/uuid.ts var nHexDigits = (n) => exactly(n)(hexDigit); var uuidV1 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("1"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV2 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("2"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV3 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("3"), nHexDigits(3), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); var uuidV4 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("4"), nHexDigits(3), char("-"), characterClass(false, "A", "a", "B", "b", "8", "9"), nHexDigits(3), char("-"), nHexDigits(12) ); var uuidV5 = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), char("5"), nHexDigits(3), char("-"), characterClass(false, "A", "a", "B", "b", "8", "9"), nHexDigits(3), char("-"), nHexDigits(12) ); var anyUUID = sequence( nHexDigits(8), char("-"), nHexDigits(4), char("-"), nHexDigits(4), char("-"), nHexDigits(4), char("-"), nHexDigits(12) ); export { alnum, alpha, and, anyNumber, anything, atLeast, atLeastOne, atMost, between, blank, char, characterClass, digit, empty, exactString, exactly, graph, hexDigit, integerRange, lower, maybe, non, oneOf, or, patterns_exports as patterns, print, punct, regexFromPattern, sequence, space, subgroup, then, times, upper, word, xdigit };