| | 1 | | @using System.IO |
| | 2 | | @using System.Text |
| | 3 | | @using System.Text.RegularExpressions |
| | 4 | |
|
| | 5 | | <div class="container"> |
| | 6 | | <div class="row"> |
| | 7 | | <div class="col"> |
| | 8 | | <p><a href="https://www.soscisurvey.de/tools/view-chars.php" target="_blank">View non-printable unicode char |
| | 9 | | <p><a href="https://github.com/BurninLeo/see-non-printable-characters/blob/main/view-chars.php"target="_blan |
| | 10 | | </div> |
| | 11 | | </div> |
| | 12 | |
|
| | 13 | | <div class="row"> |
| | 14 | | <div class="col"> |
| | 15 | | <h2>View non-printable unicode characters</h2> |
| | 16 | | <p>Online tool to display non-printable characters that may be hidden in copy&pasted strings.</p> |
| | 17 | | </div> |
| | 18 | | </div> |
| | 19 | | <div class="row"> |
| | 20 | | <div class="col"> |
| | 21 | | <label for="s">Please paste the string here:</label> |
| | 22 | | <textarea id="s" name="s" class="form-control" rows="8" cols="40" |
| | 23 | | @bind="s" style="width: 100%; box-sizing: border-box;"></textarea> |
| | 24 | | </div> |
| | 25 | | </div> |
| | 26 | | <div class="row"> |
| | 27 | | <div class="col"> |
| | 28 | | <button id="btnEncode" name="btnEncode" class="btn btn-success float-right" @onclick="Encode"> Show me the c |
| | 29 | | </div> |
| | 30 | | </div> |
| | 31 | |
|
| | 32 | | <div class="row"> |
| | 33 | | <div class="col"> |
| | 34 | | @* <div class="output"> |
| | 35 | | @((MarkupString)output) |
| | 36 | | </div> *@ |
| | 37 | |
|
| | 38 | | @((MarkupString)output) |
| | 39 | |
|
| | 40 | | <p># characters, # bytes</p> |
| | 41 | | </div> |
| | 42 | | </div> |
| | 43 | |
|
| | 44 | | <div class="row"> |
| | 45 | | <div class="col"> |
| | 46 | | <div class="output2"> |
| | 47 | | <span class="S2Tooltip anchor">S</span> |
| | 48 | | <span class="S2Tooltip container"> |
| | 49 | | <span class="S2Tooltip tiptext rounded shadow">83<br>0x53</span> |
| | 50 | | </span> |
| | 51 | | <span class="hex S2Tooltip anchor">U+A0</span> |
| | 52 | | <span class="S2Tooltip container"> |
| | 53 | | <span class="S2Tooltip tiptext rounded shadow">&#160;<br>\u00A0</span> |
| | 54 | | </span> |
| | 55 | | </div> |
| | 56 | | </div> |
| | 57 | | </div> |
| | 58 | |
|
| | 59 | | <div class="row"> |
| | 60 | | <div class="col"> |
| | 61 | |
|
| | 62 | | <h2>Helpful Sites for Details on UTF Characters</h2> |
| | 63 | | <ul> |
| | 64 | | <li><a href="https://www.branah.com/unicode-converter" target="_blank">Branah.com Unicode Converter</a>< |
| | 65 | | <li><a href="http://www.fileformat.info/info/unicode/char/search.htm" target="_blank">FileFormat.Info</a |
| | 66 | | <li><a href="http://utf8-chartable.de/unicode-utf8-table.pl" target="_blank">utf8-chartable.de</a></li> |
| | 67 | | </ul> |
| | 68 | |
|
| | 69 | | </div> |
| | 70 | | </div> |
| | 71 | |
|
| | 72 | | </div> |
| | 73 | |
|
| | 74 | | @code { |
| | 75 | | string s; |
| | 76 | | string output; |
| | 77 | |
|
| | 78 | | protected override async Task OnInitializedAsync() |
| 0 | 79 | | { |
| | 80 | | @* s = "See what's hidden in your string… or behind"; *@ |
| | 81 | | //s = "See\u00A0what\'s hidden in your string\u2026\tor be\\u200Bhind\uFEFF"; |
| 0 | 82 | | s = "a … ⟶b"; |
| 0 | 83 | | output = string.Empty; |
| | 84 | | @* var conversion = htmlChar("S"); *@ |
| | 85 | | //var conversion = htmlChar("⟶"); |
| 0 | 86 | | var conversion = htmlChar("a … ⟶b"); |
| 0 | 87 | | Console.WriteLine($"Conversion: {conversion}"); |
| 0 | 88 | | output = conversion; |
| 0 | 89 | | } |
| | 90 | |
|
| | 91 | | private void Encode() |
| 0 | 92 | | { |
| 0 | 93 | | var writer = new StringWriter(); |
| | 94 | | @* output = "abc"; // writer.ToString(); *@ |
| | 95 | |
|
| 0 | 96 | | var html = text2html(s); |
| 0 | 97 | | output = html; |
| 0 | 98 | | } |
| | 99 | |
|
| | 100 | | private void Decode() |
| 0 | 101 | | { |
| 0 | 102 | | s = ""; |
| 0 | 103 | | } |
| | 104 | |
|
| | 105 | | private string parseWhitespace(string c) |
| 0 | 106 | | { |
| 0 | 107 | | var symbol = ""; |
| 0 | 108 | | if (c == "\r") |
| 0 | 109 | | { |
| 0 | 110 | | symbol = "<span class='symbol S2Tooltip anchor'>CR</span>"; |
| 0 | 111 | | } |
| 0 | 112 | | else if (c == "\n") |
| 0 | 113 | | { |
| 0 | 114 | | symbol = "<span class='symbol S2Tooltip anchor'>LF</span>"; |
| 0 | 115 | | } |
| 0 | 116 | | else if (c == "\t") |
| 0 | 117 | | { |
| 0 | 118 | | symbol = "<span class='symbol S2Tooltip anchor'>⟶</span>​"; |
| 0 | 119 | | } |
| 0 | 120 | | else if (c == " ") |
| 0 | 121 | | { |
| 0 | 122 | | symbol = "<span class='white S2Tooltip anchor'>·</span>​"; |
| 0 | 123 | | } |
| 0 | 124 | | return symbol; |
| 0 | 125 | | } |
| | 126 | |
|
| | 127 | | // Checks that the character is not in other or seperator groups. |
| | 128 | | private bool isRegularUnicodeCharacter(string c) |
| 0 | 129 | | { |
| | 130 | |
|
| 0 | 131 | | Console.WriteLine($"isRegularUnicodeCharacter Character: {c}"); |
| | 132 | |
|
| | 133 | | //\p{M}\p{N}\p{P}\p{S} |
| 0 | 134 | | var pattern = @"[\p{L}\p{M}\p{N}\p{P}\p{S}]"; |
| 0 | 135 | | Regex rx = new Regex(pattern); |
| 0 | 136 | | MatchCollection matches = rx.Matches(c); |
| | 137 | |
|
| 0 | 138 | | Console.WriteLine($"isRegularUnicodeCharacter Character: {matches.Count}"); |
| | 139 | |
|
| 0 | 140 | | return matches.Count > 0; |
| 0 | 141 | | } |
| | 142 | |
|
| | 143 | | private string htmlChar(string c) |
| 0 | 144 | | { |
| 0 | 145 | | var desc = ""; |
| 0 | 146 | | var hex = ""; |
| | 147 | |
|
| | 148 | | //var theSize = System.Runtime.InteropServices.Marshal.SizeOf(c); |
| | 149 | | //Console.WriteLine($"Size: {theSize}") |
| | 150 | |
|
| 0 | 151 | | Console.WriteLine($"Length: {c.Length}"); |
| 0 | 152 | | var bytes = Encoding.UTF8.GetBytes(c); |
| 0 | 153 | | Console.WriteLine($"Byte Length: {bytes.Length}"); |
| | 154 | |
|
| 0 | 155 | | if (bytes.Length == 1) |
| 0 | 156 | | { |
| 0 | 157 | | var c1 = char.Parse(c); |
| 0 | 158 | | desc = Convert.ToByte(c1) + "<br>\r\n" + String.Format("0x{0,2:X2}", (Convert.ToByte(c1))); |
| 0 | 159 | | hex = String.Format("{0,2:X2}", (Convert.ToByte(c1))); |
| 0 | 160 | | } |
| | 161 | | else |
| 0 | 162 | | { |
| | 163 | | //@* $n = unpack('V', iconv('UTF-8', 'UCS-4LE', $c))[1]; *@ |
| | 164 | | // unassigned long |
| | 165 | |
|
| 0 | 166 | | bytes = Encoding.UTF8.GetBytes(c); |
| | 167 | | //var bytes = BitConverter.GetBytes(c); |
| 0 | 168 | | var isLittleEndian = BitConverter.IsLittleEndian; |
| 0 | 169 | | if (isLittleEndian) { |
| 0 | 170 | | Array.Reverse(bytes); |
| 0 | 171 | | } |
| | 172 | |
|
| 0 | 173 | | desc = $"&#{bytes.Select(x => (int)x).Sum()};<br>"; |
| 0 | 174 | | hex = "\\u" + BitConverter.ToString(bytes).Replace("-", String.Empty); // \u00A0 |
| 0 | 175 | | } |
| | 176 | |
|
| 0 | 177 | | Console.WriteLine($"CHAR: {c}"); |
| 0 | 178 | | Console.WriteLine($"DESC: {desc}"); |
| 0 | 179 | | Console.WriteLine($"HEX: {hex}"); |
| | 180 | |
|
| 0 | 181 | | var symbol = parseWhitespace(c); |
| | 182 | | // If symbol is not a whitespace char. |
| 0 | 183 | | if (symbol == String.Empty) |
| 0 | 184 | | { |
| 0 | 185 | | var isRegularCharacter = isRegularUnicodeCharacter(c); |
| | 186 | |
|
| 0 | 187 | | if (isRegularCharacter) |
| 0 | 188 | | { |
| 0 | 189 | | symbol = "<span class='S2Tooltip anchor'>" + c + "</span>"; |
| 0 | 190 | | } |
| | 191 | | else |
| 0 | 192 | | { |
| 0 | 193 | | symbol = "<span class='hex S2Tooltip anchor'>" + hex + "</span>"; |
| 0 | 194 | | } |
| 0 | 195 | | } |
| | 196 | |
|
| 0 | 197 | | return symbol + |
| 0 | 198 | | "<span class='S2Tooltip container'>" + |
| 0 | 199 | | "<span class='S2Tooltip tiptext rounded shadow'>" + desc + "</span>" + |
| 0 | 200 | | "</span>"; |
| 0 | 201 | | } |
| | 202 | |
|
| | 203 | | private string text2html(string s) |
| 0 | 204 | | { |
| | 205 | | // UTF-8 global? |
| 0 | 206 | | var html = "<div class='output'>" + "\r\n"; |
| | 207 | |
|
| 0 | 208 | | var sl = s.Length; |
| 0 | 209 | | var nlc = 0; |
| | 210 | |
|
| | 211 | | // \n = LF (Line Feed) // Used as a new line character on Unix |
| | 212 | | // \r = CR (Carriage Return) // Used as a new line character on Mac |
| | 213 | | // \r\n = CR + LF // Used as a new line character on Windows |
| | 214 | | // (char)13 = \r = CR |
| | 215 | | // Environment.NewLine = any of the above code based on the operating system |
| | 216 | |
|
| 0 | 217 | | for (var i=0; i < sl; i++) |
| 0 | 218 | | { |
| 0 | 219 | | var c = s.Substring(i, 1); // Get Character from string at position. |
| | 220 | |
|
| | 221 | | //if and else is to deal with newline characters. |
| 0 | 222 | | if (c == "\r") |
| 0 | 223 | | { |
| 0 | 224 | | if (nlc == 0) |
| 0 | 225 | | { |
| 0 | 226 | | nlc = 1; |
| 0 | 227 | | html += htmlChar(c); |
| 0 | 228 | | } |
| 0 | 229 | | else if (nlc == 1) |
| 0 | 230 | | { |
| 0 | 231 | | html += "<br>" + "\r\n" + htmlChar(c); |
| 0 | 232 | | nlc = 1; |
| 0 | 233 | | } |
| 0 | 234 | | else if (nlc == 2) { |
| 0 | 235 | | html += htmlChar(c) + "<br>" + "\r\n"; |
| 0 | 236 | | nlc = 0; |
| 0 | 237 | | } |
| 0 | 238 | | } |
| 0 | 239 | | else if (c == "\n") |
| 0 | 240 | | { |
| 0 | 241 | | var sym = htmlChar(c); |
| | 242 | |
|
| 0 | 243 | | if (nlc == 0) |
| 0 | 244 | | { |
| 0 | 245 | | nlc = 2; |
| 0 | 246 | | html += sym; |
| 0 | 247 | | } |
| 0 | 248 | | else if (nlc == 2) |
| 0 | 249 | | { |
| 0 | 250 | | html += "<br>" + "\r\n" + htmlChar(c); |
| 0 | 251 | | nlc = 2; |
| 0 | 252 | | } |
| 0 | 253 | | else if (nlc == 1) |
| 0 | 254 | | { |
| 0 | 255 | | html += htmlChar(c) + "<br>" + "\r\n"; |
| 0 | 256 | | nlc = 0; |
| 0 | 257 | | } |
| 0 | 258 | | } |
| | 259 | | else |
| 0 | 260 | | { |
| 0 | 261 | | html += htmlChar(c); |
| 0 | 262 | | } |
| 0 | 263 | | } |
| | 264 | |
|
| 0 | 265 | | return html + "</div>" + "\r\n"; |
| 0 | 266 | | } |
| | 267 | | } |