| | | 1 | | /******************************************************************************** |
| | | 2 | | * UrlUtils.cs * |
| | | 3 | | * * |
| | | 4 | | * Author: Denes Solti * |
| | | 5 | | ********************************************************************************/ |
| | | 6 | | using System; |
| | | 7 | | using System.Text; |
| | | 8 | | |
| | | 9 | | namespace NanoRoute.Internals |
| | | 10 | | { |
| | | 11 | | using Properties; |
| | | 12 | | |
| | | 13 | | internal enum UrlDecodeMode |
| | | 14 | | { |
| | | 15 | | Path, |
| | | 16 | | Form |
| | | 17 | | } |
| | | 18 | | |
| | | 19 | | internal static class UrlUtils |
| | | 20 | | { |
| | 1 | 21 | | private static readonly Encoding s_utf8 = new UTF8Encoding(false, true); |
| | | 22 | | |
| | | 23 | | public static ReadOnlyMemory<char> DecodeUrl(ReadOnlyMemory<char> source, UrlDecodeMode mode) |
| | 2 | 24 | | { |
| | 2 | 25 | | char[] result = new char[source.Length]; |
| | | 26 | | |
| | 2 | 27 | | if (!TryDecodeUrl(source.Span, result.AsSpan(), mode, out int charsWritten)) |
| | 1 | 28 | | throw new InvalidOperationException(Resources.ERR_DECODING_FAILED); |
| | | 29 | | |
| | 2 | 30 | | return result.AsMemory(0, charsWritten); |
| | 2 | 31 | | } |
| | | 32 | | |
| | | 33 | | public static bool TryDecodeUrl(ReadOnlySpan<char> source, Span<char> destination, UrlDecodeMode mode, out int c |
| | 2 | 34 | | { |
| | 2 | 35 | | charsWritten = 0; |
| | | 36 | | |
| | 2 | 37 | | for (int i = 0; i < source.Length;) |
| | 2 | 38 | | { |
| | 2 | 39 | | if (charsWritten >= destination.Length) |
| | 1 | 40 | | return false; |
| | | 41 | | |
| | 2 | 42 | | switch (source[i]) |
| | | 43 | | { |
| | 1 | 44 | | case '+' when mode is UrlDecodeMode.Form: |
| | 1 | 45 | | destination[charsWritten++] = ' '; |
| | 1 | 46 | | i++; |
| | 1 | 47 | | break; |
| | | 48 | | |
| | | 49 | | case '%': |
| | 1 | 50 | | if (!TryDecodeUtf8Sequence(source, ref i, destination, ref charsWritten)) |
| | 1 | 51 | | return false; |
| | 1 | 52 | | break; |
| | | 53 | | |
| | | 54 | | default: |
| | 2 | 55 | | if (!TryCopyLiteralSegment(source, mode, ref i, destination, ref charsWritten)) |
| | 1 | 56 | | return false; |
| | 2 | 57 | | break; |
| | | 58 | | } |
| | 2 | 59 | | } |
| | | 60 | | |
| | 2 | 61 | | return true; |
| | 2 | 62 | | } |
| | | 63 | | |
| | | 64 | | private static bool TryCopyLiteralSegment(ReadOnlySpan<char> source, UrlDecodeMode mode, ref int offset, Span<ch |
| | 2 | 65 | | { |
| | 2 | 66 | | ReadOnlySpan<char> segment = source.Slice(offset); |
| | | 67 | | |
| | 2 | 68 | | int special = mode is UrlDecodeMode.Form ? segment.IndexOfAny('%', '+') : segment.IndexOf('%'); |
| | 2 | 69 | | if (special > 0) |
| | 1 | 70 | | segment = segment.Slice(0, special); |
| | | 71 | | |
| | 2 | 72 | | if (charsWritten + segment.Length > destination.Length) |
| | 1 | 73 | | return false; |
| | | 74 | | |
| | 2 | 75 | | segment.CopyTo(destination.Slice(charsWritten)); |
| | 2 | 76 | | charsWritten += segment.Length; |
| | 2 | 77 | | offset += segment.Length; |
| | | 78 | | |
| | 2 | 79 | | return true; |
| | 2 | 80 | | } |
| | | 81 | | |
| | | 82 | | private static bool TryDecodeUtf8Sequence(ReadOnlySpan<char> source, ref int offset, Span<char> destination, ref |
| | 1 | 83 | | { |
| | | 84 | | const int ESCAPED_BYTE_LENGTH = 3; // "%XX" |
| | | 85 | | #if NETSTANDARD2_1_OR_GREATER |
| | 1 | 86 | | Span<byte> bytes = stackalloc byte[4]; |
| | | 87 | | #else |
| | | 88 | | byte[] bytes = new byte[4]; |
| | | 89 | | #endif |
| | | 90 | | // The first escaped byte determines how many %XX chunks belong to this UTF-8 sequence. |
| | 1 | 91 | | if (!TryParseEscapedByte(source, offset, out bytes[0])) |
| | 1 | 92 | | return false; |
| | | 93 | | |
| | 1 | 94 | | int byteCount = GetUtf8ByteCount(bytes[0]); |
| | 1 | 95 | | if (byteCount < 0) |
| | 1 | 96 | | return false; |
| | | 97 | | |
| | | 98 | | // Collect the remaining escaped bytes, then let the strict UTF-8 decoder validate them. |
| | 1 | 99 | | for (int i = 1; i < byteCount; i++) |
| | 1 | 100 | | if (!TryParseEscapedByte(source, offset + i * ESCAPED_BYTE_LENGTH, out bytes[i])) |
| | 1 | 101 | | return false; |
| | | 102 | | |
| | | 103 | | #if NETSTANDARD2_1_OR_GREATER |
| | 1 | 104 | | Span<char> chars = stackalloc char[2]; |
| | | 105 | | #else |
| | | 106 | | char[] chars = new char[2]; |
| | | 107 | | #endif |
| | | 108 | | int decodedChars; |
| | | 109 | | |
| | | 110 | | try |
| | 1 | 111 | | { |
| | | 112 | | #if NETSTANDARD2_1_OR_GREATER |
| | 1 | 113 | | decodedChars = s_utf8.GetChars(bytes.Slice(0, byteCount), chars); |
| | | 114 | | #else |
| | | 115 | | decodedChars = s_utf8.GetChars(bytes, 0, byteCount, chars, 0); |
| | | 116 | | #endif |
| | 1 | 117 | | } |
| | 1 | 118 | | catch (DecoderFallbackException) |
| | 1 | 119 | | { |
| | 1 | 120 | | return false; |
| | | 121 | | } |
| | | 122 | | |
| | 1 | 123 | | if (charsWritten + decodedChars > destination.Length) |
| | 1 | 124 | | return false; |
| | | 125 | | |
| | 1 | 126 | | chars |
| | 1 | 127 | | #if NETSTANDARD2_0 |
| | 1 | 128 | | .AsSpan(0, decodedChars) |
| | 1 | 129 | | #endif |
| | 1 | 130 | | .CopyTo(destination.Slice(charsWritten)); |
| | 1 | 131 | | charsWritten += decodedChars; |
| | 1 | 132 | | offset += byteCount * ESCAPED_BYTE_LENGTH; |
| | | 133 | | |
| | 1 | 134 | | return true; |
| | 1 | 135 | | } |
| | | 136 | | |
| | | 137 | | private static bool TryParseEscapedByte(ReadOnlySpan<char> source, int offset, out byte value) |
| | 1 | 138 | | { |
| | 1 | 139 | | value = default; |
| | | 140 | | |
| | 1 | 141 | | return offset >= 0 && |
| | 1 | 142 | | offset + 2 < source.Length && |
| | 1 | 143 | | source[offset] is '%' && |
| | 1 | 144 | | TryParseHex2(source[offset + 1], source[offset + 2], out value); |
| | 1 | 145 | | } |
| | | 146 | | |
| | 1 | 147 | | private static int GetUtf8ByteCount(byte first) => first switch |
| | 1 | 148 | | { |
| | 1 | 149 | | <= 0x7F => 1, |
| | 1 | 150 | | >= 0xC2 and <= 0xDF => 2, |
| | 1 | 151 | | >= 0xE0 and <= 0xEF => 3, |
| | 1 | 152 | | >= 0xF0 and <= 0xF4 => 4, |
| | 1 | 153 | | _ => -1 |
| | 1 | 154 | | }; |
| | | 155 | | |
| | | 156 | | private static bool TryParseHex2(char high, char low, out byte value) |
| | 1 | 157 | | { |
| | 1 | 158 | | value = default; |
| | | 159 | | |
| | 1 | 160 | | int |
| | 1 | 161 | | hi = HexToInt(high), |
| | 1 | 162 | | lo = HexToInt(low); |
| | | 163 | | |
| | 1 | 164 | | if (hi < 0 || lo < 0) |
| | 1 | 165 | | return false; |
| | | 166 | | |
| | 1 | 167 | | value = (byte) ((hi << 4) | lo); |
| | 1 | 168 | | return true; |
| | | 169 | | |
| | | 170 | | static int HexToInt(char c) |
| | | 171 | | { |
| | | 172 | | if ((uint)(c - '0') <= 9) return c - '0'; |
| | | 173 | | if ((uint)(c - 'a') <= 5) return c - 'a' + 10; |
| | | 174 | | if ((uint)(c - 'A') <= 5) return c - 'A' + 10; |
| | | 175 | | return -1; |
| | | 176 | | } |
| | 1 | 177 | | } |
| | | 178 | | } |
| | | 179 | | } |
| | | 180 | | |