Bug Summary

File:builds/wireshark/wireshark/wsutil/str_util.c
Warning:line 1212, column 9
Value stored to 'printable_bytes' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name str_util.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -ffloat16-excess-precision=standard -fbfloat16-excess-precision=standard -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/builds/wireshark/wireshark/build -fcoverage-compilation-dir=/builds/wireshark/wireshark/build -resource-dir /usr/lib/llvm-22/lib/clang/22 -isystem /usr/include/glib-2.0 -isystem /usr/lib/x86_64-linux-gnu/glib-2.0/include -D BUILD_WSUTIL -D CARES_NO_DEPRECATED -D G_DISABLE_DEPRECATED -D G_DISABLE_SINGLE_INCLUDES -D WS_BUILD_DLL -D WS_DEBUG -D WS_DEBUG_UTF_8 -D wsutil_EXPORTS -I /builds/wireshark/wireshark/build -I /builds/wireshark/wireshark -I /builds/wireshark/wireshark/include -I /builds/wireshark/wireshark/build/wsutil -D _GLIBCXX_ASSERTIONS -internal-isystem /usr/lib/llvm-22/lib/clang/22/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/16/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/builds/wireshark/wireshark/= -fmacro-prefix-map=/builds/wireshark/wireshark/build/= -fmacro-prefix-map=../= -Wno-format-nonliteral -std=gnu17 -ferror-limit 19 -fvisibility=hidden -fwrapv -fwrapv-pointer -fstrict-flex-arrays=3 -stack-protector 2 -fstack-clash-protection -fcf-protection=full -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fexceptions -fcolor-diagnostics -analyzer-output=html -faddrsig -fdwarf2-cfi-asm -o /builds/wireshark/wireshark/sbout/2026-06-22-100421-3596-1 -x c /builds/wireshark/wireshark/wsutil/str_util.c
1/* str_util.c
2 * String utility routines
3 *
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <gerald@wireshark.org>
6 * Copyright 1998 Gerald Combs
7 *
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
10
11#define _GNU_SOURCE
12#include "config.h"
13#include "str_util.h"
14
15#include <string.h>
16#include <locale.h>
17#include <math.h>
18
19#include <ws_codepoints.h>
20
21#include <wsutil/to_str.h>
22
23
24struct prefix_parameters {
25 const char * const *prefix; /**< array of prefixes to represent unit multiplication factors. */
26 int prefix_count; /**< number of elements in the prefix array. */
27 int power; /**< multiplication factor between prefixes. */
28 int prefix_offset; /**< index of element within the prefix array for "no prefix". */
29};
30
31static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
32 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
33
34/* Given a "flags" value passed into a formatting function, determine which
35 * formatting parameters should apply.
36 */
37static const struct prefix_parameters *
38prefix_parameters_for_flags(uint16_t flags) {
39 static const char * const si_prefixes[] = {" a", " f", " p", " n", " μ", " m", " ", " k", " M", " G", " T", " P", " E"};
40 static const struct prefix_parameters si_parameters = {si_prefixes, G_N_ELEMENTS(si_prefixes)(sizeof (si_prefixes) / sizeof ((si_prefixes)[0])), 1000, 6};
41 static const char * const iec_prefixes[] = {" ", " Ki", " Mi", " Gi", " Ti", " Pi", " Ei"};
42 static const struct prefix_parameters iec_parameters = {iec_prefixes, G_N_ELEMENTS(iec_prefixes)(sizeof (iec_prefixes) / sizeof ((iec_prefixes)[0])), 1024, 0};
43
44 return (flags & FORMAT_SIZE_PREFIX_IEC(1 << 1)) != 0 ? &iec_parameters : &si_parameters;
45}
46
47char *
48wmem_strconcat(wmem_allocator_t *allocator, const char *first, ...)
49{
50 size_t len;
51 va_list args;
52 char *s;
53 char *concat;
54 char *ptr;
55
56 if (!first)
57 return NULL((void*)0);
58
59 len = 1 + strlen(first);
60 va_start(args, first)__builtin_va_start(args, first);
61 while ((s = va_arg(args, char*)__builtin_va_arg(args, char*))) {
62 len += strlen(s);
63 }
64 va_end(args)__builtin_va_end(args);
65
66 ptr = concat = (char *)wmem_alloc(allocator, len);
67
68 ptr = g_stpcpy(ptr, first);
69 va_start(args, first)__builtin_va_start(args, first);
70 while ((s = va_arg(args, char*)__builtin_va_arg(args, char*))) {
71 ptr = g_stpcpy(ptr, s);
72 }
73 va_end(args)__builtin_va_end(args);
74
75 return concat;
76}
77
78char *
79wmem_strjoin(wmem_allocator_t *allocator,
80 const char *separator, const char *first, ...)
81{
82 size_t len;
83 va_list args;
84 size_t separator_len;
85 char *s;
86 char *concat;
87 char *ptr;
88
89 if (!first)
90 return NULL((void*)0);
91
92 if (separator == NULL((void*)0)) {
93 separator = "";
94 }
95
96 separator_len = strlen (separator);
97
98 len = 1 + strlen(first); /* + 1 for null byte */
99 va_start(args, first)__builtin_va_start(args, first);
100 while ((s = va_arg(args, char*)__builtin_va_arg(args, char*))) {
101 len += (separator_len + strlen(s));
102 }
103 va_end(args)__builtin_va_end(args);
104
105 ptr = concat = (char *)wmem_alloc(allocator, len);
106 ptr = g_stpcpy(ptr, first);
107 va_start(args, first)__builtin_va_start(args, first);
108 while ((s = va_arg(args, char*)__builtin_va_arg(args, char*))) {
109 ptr = g_stpcpy(ptr, separator);
110 ptr = g_stpcpy(ptr, s);
111 }
112 va_end(args)__builtin_va_end(args);
113
114 return concat;
115
116}
117
118char *
119wmem_strjoinv(wmem_allocator_t *allocator,
120 const char *separator, char **str_array)
121{
122 char *string = NULL((void*)0);
123
124 ws_return_val_if(!str_array, NULL)do { if (1 && (!str_array)) { ws_log_full("InvalidArg"
, LOG_LEVEL_WARNING, "wsutil/str_util.c", 124, __func__, "invalid argument: %s"
, "!str_array"); return (((void*)0)); } } while (0)
;
125
126 if (separator == NULL((void*)0)) {
127 separator = "";
128 }
129
130 if (str_array[0]) {
131 int i;
132 char *ptr;
133 size_t len, separator_len;
134
135 separator_len = strlen(separator);
136
137 /* Get first part of length. Plus one for null byte. */
138 len = 1 + strlen(str_array[0]);
139 /* Get the full length, including the separators. */
140 for (i = 1; str_array[i] != NULL((void*)0); i++) {
141 len += separator_len;
142 len += strlen(str_array[i]);
143 }
144
145 /* Allocate and build the string. */
146 string = (char *)wmem_alloc(allocator, len);
147 ptr = g_stpcpy(string, str_array[0]);
148 for (i = 1; str_array[i] != NULL((void*)0); i++) {
149 ptr = g_stpcpy(ptr, separator);
150 ptr = g_stpcpy(ptr, str_array[i]);
151 }
152 } else {
153 string = wmem_strdup(allocator, "");
154 }
155
156 return string;
157
158}
159
160char **
161wmem_strsplit(wmem_allocator_t *allocator, const char *src,
162 const char *delimiter, int max_tokens)
163{
164 char *splitted;
165 char *s;
166 unsigned tokens;
167 unsigned sep_len;
168 unsigned i;
169 char **vec;
170
171 if (!src || !delimiter || !delimiter[0])
172 return NULL((void*)0);
173
174 /* An empty string results in an empty vector. */
175 if (!src[0]) {
176 vec = wmem_new0(allocator, char *)((char **)wmem_alloc0((allocator), sizeof(char *)));
177 return vec;
178 }
179
180 splitted = wmem_strdup(allocator, src);
181 sep_len = (unsigned)strlen(delimiter);
182
183 if (max_tokens < 1)
184 max_tokens = INT_MAX2147483647;
185
186 /* Calculate the number of fields. */
187 s = splitted;
188 tokens = 1;
189 while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter)_Generic (0 ? (s) : (void *) 1, const void *: (const char *) (
strstr (s, delimiter)), default: strstr (s, delimiter))
)) {
190 s += sep_len;
191 tokens++;
192 }
193
194 vec = wmem_alloc_array(allocator, char *, tokens + 1)((char **)wmem_alloc((allocator), (((((tokens + 1)) <= 0) ||
((size_t)sizeof(char *) > (9223372036854775807L / (size_t
)((tokens + 1))))) ? 0 : (sizeof(char *) * ((tokens + 1))))))
;
195
196 /* Populate the array of string tokens. */
197 s = splitted;
198 vec[0] = s;
199 tokens = 1;
200 while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter)_Generic (0 ? (s) : (void *) 1, const void *: (const char *) (
strstr (s, delimiter)), default: strstr (s, delimiter))
)) {
201 for (i = 0; i < sep_len; i++)
202 s[i] = '\0';
203 s += sep_len;
204 vec[tokens] = s;
205 tokens++;
206
207 }
208
209 vec[tokens] = NULL((void*)0);
210
211 return vec;
212}
213
214/*
215 * wmem_ascii_strdown:
216 * based on g_ascii_strdown.
217 */
218char*
219wmem_ascii_strdown(wmem_allocator_t *allocator, const char *str, ssize_t len)
220{
221 char *result, *s;
222 size_t abs_len;
223
224 g_return_val_if_fail (str != NULL, NULL)do { if ((str != ((void*)0))) { } else { g_return_if_fail_warning
(((gchar*) 0), ((const char*) (__func__)), "str != NULL"); return
(((void*)0)); } } while (0)
;
225
226 abs_len = (len < 0) ? strlen(str) : (size_t)len;
227
228 result = wmem_strndup(allocator, str, abs_len);
229 for (s = result; *s; s++)
230 *s = g_ascii_tolower (*s);
231
232 return result;
233}
234
235int
236ws_xton(char ch)
237{
238 switch (ch) {
239 case '0': return 0;
240 case '1': return 1;
241 case '2': return 2;
242 case '3': return 3;
243 case '4': return 4;
244 case '5': return 5;
245 case '6': return 6;
246 case '7': return 7;
247 case '8': return 8;
248 case '9': return 9;
249 case 'a': case 'A': return 10;
250 case 'b': case 'B': return 11;
251 case 'c': case 'C': return 12;
252 case 'd': case 'D': return 13;
253 case 'e': case 'E': return 14;
254 case 'f': case 'F': return 15;
255 default: return -1;
256 }
257}
258
259/* Convert all ASCII letters to lower case, in place. */
260char *
261ascii_strdown_inplace(char *str)
262{
263 char *s;
264
265 for (s = str; *s; s++)
266 /* What 'g_ascii_tolower (char c)' does, this should be slightly more efficient */
267 *s = g_ascii_isupper (*s)((g_ascii_table[(guchar) (*s)] & G_ASCII_UPPER) != 0) ? *s - 'A' + 'a' : *s;
268
269 return (str);
270}
271
272/* Convert all ASCII letters to upper case, in place. */
273char *
274ascii_strup_inplace(char *str)
275{
276 char *s;
277
278 for (s = str; *s; s++)
279 /* What 'g_ascii_toupper (char c)' does, this should be slightly more efficient */
280 *s = g_ascii_islower (*s)((g_ascii_table[(guchar) (*s)] & G_ASCII_LOWER) != 0) ? *s - 'a' + 'A' : *s;
281
282 return (str);
283}
284
285/* Check if an entire string is printable. */
286bool_Bool
287isprint_string(const char *str)
288{
289 unsigned pos;
290
291 /* Loop until we reach the end of the string (a null) */
292 for(pos = 0; str[pos] != '\0'; pos++){
293 if(!g_ascii_isprint(str[pos])((g_ascii_table[(guchar) (str[pos])] & G_ASCII_PRINT) != 0
)
){
294 /* The string contains a non-printable character */
295 return false0;
296 }
297 }
298
299 /* The string contains only printable characters */
300 return true1;
301}
302
303/* Check if an entire UTF-8 string is printable. */
304bool_Bool
305isprint_utf8_string(const char *str, const unsigned length)
306{
307 const char *strend = str + length;
308
309 if (!g_utf8_validate(str, length, NULL((void*)0))) {
310 return false0;
311 }
312
313 while (str < strend) {
314 /* This returns false for G_UNICODE_CONTROL | G_UNICODE_FORMAT |
315 * G_UNICODE_UNASSIGNED | G_UNICODE_SURROGATE
316 * XXX: Could it be ok to have certain format characters, e.g.
317 * U+00AD SOFT HYPHEN? If so, format_text() should be changed too.
318 */
319 if (!g_unichar_isprint(g_utf8_get_char(str))) {
320 return false0;
321 }
322 str = g_utf8_next_char(str)((str) + g_utf8_skip[*(const guchar *)(str)]);
323 }
324
325 return true1;
326}
327
328/* Check if an entire string is digits. */
329bool_Bool
330isdigit_string(const char *str)
331{
332 unsigned pos;
333
334 /* Loop until we reach the end of the string (a null) */
335 for(pos = 0; str[pos] != '\0'; pos++){
336 if(!g_ascii_isdigit(str[pos])((g_ascii_table[(guchar) (str[pos])] & G_ASCII_DIGIT) != 0
)
){
337 /* The string contains a non-digit character */
338 return false0;
339 }
340 }
341
342 /* The string contains only digits */
343 return true1;
344}
345
346const char *
347ws_ascii_strcasestr(const char *haystack, const char *needle)
348{
349 /* Do not use strcasestr() here, even if a system has it, as it is
350 * locale-dependent (and has different results for e.g. Turkic languages.)
351 * FreeBSD, NetBSD, macOS have a strcasestr_l() that could be used.
352 */
353 size_t hlen = strlen(haystack);
354 size_t nlen = strlen(needle);
355
356 while (hlen-- >= nlen) {
357 if (!g_ascii_strncasecmp(haystack, needle, nlen))
358 return haystack;
359 haystack++;
360 }
361 return NULL((void*)0);
362}
363
364/* Return the last occurrence of ch in the n bytes of haystack.
365 * If not found or n is 0, return NULL. */
366const uint8_t *
367ws_memrchr(const void *_haystack, int ch, size_t n)
368{
369#ifdef HAVE_MEMRCHR1
370 return memrchr(_haystack, ch, n);
371#else
372 /* A generic implementation. This could be optimized considerably,
373 * e.g. by fetching a word at a time.
374 */
375 if (n == 0) {
376 return NULL((void*)0);
377 }
378 const uint8_t *haystack = _haystack;
379 const uint8_t *p;
380 uint8_t c = (uint8_t)ch;
381
382 const uint8_t *const end = haystack + n - 1;
383
384 for (p = end; p >= haystack; --p) {
385 if (*p == c) {
386 return p;
387 }
388 }
389
390 return NULL((void*)0);
391#endif /* HAVE_MEMRCHR */
392}
393
394static const char *thousands_grouping_fmt;
395static const char *thousands_grouping_fmt_flt;
396
397DIAG_OFF(format)clang diagnostic push clang diagnostic ignored "-Wformat"
398static void test_printf_thousands_grouping(void) {
399 /* test whether wmem_strbuf works with "'" flag character */
400 wmem_strbuf_t *buf = wmem_strbuf_new(NULL((void*)0), NULL((void*)0));
401 wmem_strbuf_append_printf(buf, "%'d", 22);
402 if (g_strcmp0(wmem_strbuf_get_str(buf), "22") == 0) {
403 thousands_grouping_fmt = "%'"PRId64"l" "d";
404 thousands_grouping_fmt_flt = "%'.*f";
405 } else {
406 /* Don't use */
407 thousands_grouping_fmt = "%"PRId64"l" "d";
408 thousands_grouping_fmt_flt = "%.*f";
409 }
410 wmem_strbuf_destroy(buf);
411}
412DIAG_ON(format)clang diagnostic pop
413
414static const char* decimal_point = NULL((void*)0);
415
416static void truncate_numeric_strbuf(wmem_strbuf_t *strbuf, int n) {
417
418 const char *s = wmem_strbuf_get_str(strbuf);
419 const char *p;
420 int count;
421
422 if (decimal_point == NULL((void*)0)) {
423 decimal_point = localeconv()->decimal_point;
424 }
425
426 p = (const char *)strchr(s, decimal_point[0])_Generic (0 ? (s) : (void *) 1, const void *: (const char *) (
strchr (s, decimal_point[0])), default: strchr (s, decimal_point
[0]))
;
427 if (p != NULL((void*)0)) {
428 count = n;
429 while (count >= 0) {
430 count--;
431 if (*p == '\0')
432 break;
433 p++;
434 }
435
436 p--;
437 while (*p == '0') {
438 p--;
439 }
440
441 if (*p != decimal_point[0]) {
442 p++;
443 }
444 wmem_strbuf_truncate(strbuf, (size_t)(p - s));
445 }
446}
447
448/* Given a floating point value, return it in a human-readable format,
449 * using units with metric prefixes (falling back to scientific notation
450 * with the base units if outside the range.)
451 */
452char *
453format_units(wmem_allocator_t *allocator, double size,
454 format_size_units_e unit, uint16_t flags,
455 int precision)
456{
457 wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL((void*)0));
458 bool_Bool is_small = false0;
459 /* is_small is when to use the longer, spelled out unit.
460 * We use it for inf, NaN, 0, and unprefixed small values,
461 * but not for unprefixed values using scientific notation
462 * the value is outside the supported prefix range.
463 */
464 bool_Bool scientific = false0;
465 double abs_size = fabs(size);
466 const struct prefix_parameters * const pp = prefix_parameters_for_flags(flags);
467 int prefix_index = pp->prefix_offset;
468 char *ret_val;
469
470 if (thousands_grouping_fmt == NULL((void*)0))
471 test_printf_thousands_grouping();
472
473 if (isfinite(size)__builtin_isfinite (size) && size != 0.0) {
474
475 double comp = precision == 0 ? 10.0 : 1.0;
476
477 /* For precision 0, use the range [10, 10*power) because only
478 * one significant digit is not as useful. This is what format_size
479 * does for integers. ("ls -h" uses one digit after the decimal
480 * point only for the [1, 10) range, g_format_size() always displays
481 * tenths.) Prefer non-prefixed units for the range [1,10), though.
482 *
483 * We have a limited number of units to check, so this (which
484 * can be unrolled) is presumably faster than log + floor + pow/exp
485 */
486 if (abs_size < 1.0) {
487 while (abs_size < comp) {
488 abs_size *= pp->power;
489 if (prefix_index == 0) {
490 scientific = true1;
491 break;
492 }
493 prefix_index--;
494 }
495 } else {
496 while (abs_size >= comp * pp->power) {
497 abs_size /= pp->power;
498 if (prefix_index == pp->prefix_count - 1) {
499 scientific = true1;
500 break;
501 }
502 prefix_index++;
503 }
504 }
505 }
506
507 if (scientific) {
508 wmem_strbuf_append_printf(human_str, "%.*g", precision + 1, size);
509 prefix_index = pp->prefix_offset;
510 } else {
511 if (prefix_index == pp->prefix_offset) {
512 is_small = true1;
513 }
514 size = copysign(abs_size, size);
515 // Truncate trailing zeros, but do it this way because we know
516 // we don't want scientific notation, and we don't want %g to
517 // switch to that if precision is small. (We could always use
518 // %g when precision is large.)
519 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt_flt, precision, size);
520 truncate_numeric_strbuf(human_str, precision);
521 // XXX - when rounding to a certain precision, printf might
522 // round up to "power" from something like 999.99999995, which
523 // looks a little odd on a graph when transitioning from 1,000 bytes
524 // (for values just under 1 kB) to 1 kB (for values 1 kB and larger.)
525 // Due to edge cases in binary fp representation and how printf might
526 // round things, the right way to handle it is taking the printf output
527 // and comparing it to "1000" and "1024" and adjusting the exponent
528 // if so - though we need to compare to the version with the thousands
529 // separator if we have that (which makes it harder to use strnatcmp
530 // as is.)
531 }
532
533 wmem_strbuf_append(human_str, pp->prefix[prefix_index]);
534
535 switch (unit) {
536 case FORMAT_SIZE_UNIT_NONE:
537 break;
538 case FORMAT_SIZE_UNIT_BYTES:
539 wmem_strbuf_append(human_str, is_small ? "bytes" : "B");
540 break;
541 case FORMAT_SIZE_UNIT_BITS:
542 wmem_strbuf_append(human_str, is_small ? "bits" : "b");
543 break;
544 case FORMAT_SIZE_UNIT_BITS_S:
545 wmem_strbuf_append(human_str, is_small ? "bits/s" : "bps");
546 break;
547 case FORMAT_SIZE_UNIT_BYTES_S:
548 wmem_strbuf_append(human_str, is_small ? "bytes/s" : "Bps");
549 break;
550 case FORMAT_SIZE_UNIT_PACKETS:
551 wmem_strbuf_append(human_str, is_small ? "packets" : "pkts");
552 break;
553 case FORMAT_SIZE_UNIT_PACKETS_S:
554 wmem_strbuf_append(human_str, is_small ? "packets/s" : "pkts/s");
555 break;
556 case FORMAT_SIZE_UNIT_EVENTS:
557 wmem_strbuf_append(human_str, is_small ? "events" : "evts");
558 break;
559 case FORMAT_SIZE_UNIT_EVENTS_S:
560 wmem_strbuf_append(human_str, is_small ? "events/s" : "evts/s");
561 break;
562 case FORMAT_SIZE_UNIT_FIELDS:
563 wmem_strbuf_append(human_str, is_small ? "fields" : "flds");
564 break;
565 case FORMAT_SIZE_UNIT_SECONDS:
566 wmem_strbuf_append(human_str, is_small ? "seconds" : "s");
567 break;
568 case FORMAT_SIZE_UNIT_ERLANGS:
569 wmem_strbuf_append(human_str, is_small ? "erlangs" : "E");
570 break;
571 default:
572 ws_assert_not_reached()ws_log_fatal_full("", LOG_LEVEL_ERROR, "wsutil/str_util.c", 572
, __func__, "assertion \"not reached\" failed")
;
573 }
574
575 ret_val = wmem_strbuf_finalize(human_str);
576 /* Convention is a space between the value and the units. If we have
577 * a prefix, the space is before the prefix. There are two possible
578 * uses of FORMAT_SIZE_UNIT_NONE:
579 * 1. Add a unit immediately after the string returned. In this case,
580 * we would want the string to end with a space if there's no prefix.
581 * 2. The unit appears somewhere else, e.g. in a legend, header, or
582 * different column. In this case, we don't want the string to end
583 * with a space if there's no prefix.
584 * chomping the string here, as we've traditionally done, optimizes for
585 * the latter case but makes the former case harder.
586 * Perhaps the right approach is to distinguish the cases with a new
587 * enum value.
588 */
589 return g_strchomp(ret_val);
590}
591
592/* Given a size, return its value in a human-readable format */
593/* This doesn't handle fractional values. We might want to just
594 * call the version with the double and precision 0 (possibly
595 * slower due to the use of floating point math, but do we care?)
596 */
597char *
598format_size_wmem(wmem_allocator_t *allocator, int64_t size,
599 format_size_units_e unit, uint16_t flags)
600{
601 wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL((void*)0));
602 bool_Bool is_small = false0;
603 const struct prefix_parameters * const pp = prefix_parameters_for_flags(flags);
604 char *ret_val;
605
606 if (thousands_grouping_fmt == NULL((void*)0))
607 test_printf_thousands_grouping();
608
609 int prefix_index = pp->prefix_offset;
610 int64_t scale = 1;
611 while (prefix_index + 1 < pp->prefix_count && scale < INT64_MAX(9223372036854775807L) / (10 * pp->power) && size >= scale * pp->power * 10) {
612 prefix_index++;
613 scale *= pp->power;
614 }
615
616 wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / scale);
617 wmem_strbuf_append(human_str, pp->prefix[prefix_index]);
618 is_small = prefix_index == pp->prefix_offset;
619
620 switch (unit) {
621 case FORMAT_SIZE_UNIT_NONE:
622 break;
623 case FORMAT_SIZE_UNIT_BYTES:
624 wmem_strbuf_append(human_str, is_small ? "bytes" : "B");
625 break;
626 case FORMAT_SIZE_UNIT_BITS:
627 wmem_strbuf_append(human_str, is_small ? "bits" : "b");
628 break;
629 case FORMAT_SIZE_UNIT_BITS_S:
630 wmem_strbuf_append(human_str, is_small ? "bits/s" : "bps");
631 break;
632 case FORMAT_SIZE_UNIT_BYTES_S:
633 wmem_strbuf_append(human_str, is_small ? "bytes/s" : "Bps");
634 break;
635 case FORMAT_SIZE_UNIT_PACKETS:
636 wmem_strbuf_append(human_str, is_small ? "packets" : "pkts");
637 break;
638 case FORMAT_SIZE_UNIT_PACKETS_S:
639 wmem_strbuf_append(human_str, is_small ? "packets/s" : "pkts/s");
640 break;
641 case FORMAT_SIZE_UNIT_EVENTS:
642 wmem_strbuf_append(human_str, is_small ? "events" : "evts");
643 break;
644 case FORMAT_SIZE_UNIT_EVENTS_S:
645 wmem_strbuf_append(human_str, is_small ? "events/s" : "evts/s");
646 break;
647 case FORMAT_SIZE_UNIT_FIELDS:
648 wmem_strbuf_append(human_str, is_small ? "fields" : "flds");
649 break;
650 case FORMAT_SIZE_UNIT_SECONDS:
651 wmem_strbuf_append(human_str, is_small ? "seconds" : "s");
652 break;
653 case FORMAT_SIZE_UNIT_ERLANGS:
654 wmem_strbuf_append(human_str, is_small ? "erlangs" : "E");
655 break;
656 default:
657 ws_assert_not_reached()ws_log_fatal_full("", LOG_LEVEL_ERROR, "wsutil/str_util.c", 657
, __func__, "assertion \"not reached\" failed")
;
658 }
659
660 ret_val = wmem_strbuf_finalize(human_str);
661 return g_strchomp(ret_val);
662}
663
664char
665printable_char_or_period(char c)
666{
667 return g_ascii_isprint(c)((g_ascii_table[(guchar) (c)] & G_ASCII_PRINT) != 0) ? c : '.';
668}
669
670/*
671 * This is used by the display filter engine and must be compatible
672 * with display filter syntax.
673 */
674static inline bool_Bool
675escape_char(char c, char *p)
676{
677 int r = -1;
678 ws_assert(p)do { if ((1) && !(p)) ws_log_fatal_full("", LOG_LEVEL_ERROR
, "wsutil/str_util.c", 678, __func__, "assertion failed: %s",
"p"); } while (0)
;
679
680 /*
681 * backslashes and double-quotes must be escaped (double-quotes
682 * are escaped by passing '"' as quote_char in escape_string_len)
683 * whitespace is also escaped.
684 */
685 switch (c) {
686 case '\a': r = 'a'; break;
687 case '\b': r = 'b'; break;
688 case '\f': r = 'f'; break;
689 case '\n': r = 'n'; break;
690 case '\r': r = 'r'; break;
691 case '\t': r = 't'; break;
692 case '\v': r = 'v'; break;
693 case '\\': r = '\\'; break;
694 case '\0': r = '0'; break;
695 }
696
697 if (r != -1) {
698 *p = r;
699 return true1;
700 }
701 return false0;
702}
703
704static inline bool_Bool
705escape_null(char c, char *p)
706{
707 ws_assert(p)do { if ((1) && !(p)) ws_log_fatal_full("", LOG_LEVEL_ERROR
, "wsutil/str_util.c", 707, __func__, "assertion failed: %s",
"p"); } while (0)
;
708 if (c == '\0') {
709 *p = '0';
710 return true1;
711 }
712 return false0;
713}
714
715static char *
716escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
717 bool_Bool (*escape_func)(char c, char *p), bool_Bool add_quotes,
718 char quote_char, bool_Bool double_quote)
719{
720 char c, r;
721 wmem_strbuf_t *buf;
722 size_t abs_len, alloc_size, i;
723
724 abs_len = (len < 0) ? strlen(string) : (size_t)len;
725
726 alloc_size = abs_len;
727 if (add_quotes)
728 alloc_size += 2;
729
730 buf = wmem_strbuf_new_sized(alloc, alloc_size);
731
732 if (add_quotes && quote_char != '\0')
733 wmem_strbuf_append_c(buf, quote_char);
734
735 for (i = 0; i < abs_len; i++) {
736 c = string[i];
737 if ((escape_func(c, &r))) {
738 wmem_strbuf_append_c(buf, '\\');
739 wmem_strbuf_append_c(buf, r);
740 }
741 else if (c == quote_char && quote_char != '\0') {
742 /* If quoting, we must escape the quote_char somehow. */
743 if (double_quote) {
744 wmem_strbuf_append_c(buf, c);
745 wmem_strbuf_append_c(buf, c);
746 } else {
747 wmem_strbuf_append_c(buf, '\\');
748 wmem_strbuf_append_c(buf, c);
749 }
750 }
751 else if (c == '\\' && quote_char != '\0' && !double_quote) {
752 /* If quoting, and escaping the quote_char with a backslash,
753 * then backslash must be escaped, even if escape_func doesn't. */
754 wmem_strbuf_append_c(buf, '\\');
755 wmem_strbuf_append_c(buf, '\\');
756 }
757 else {
758 /* Other UTF-8 bytes are passed through. */
759 wmem_strbuf_append_c(buf, c);
760 }
761 }
762
763 if (add_quotes && quote_char != '\0')
764 wmem_strbuf_append_c(buf, quote_char);
765
766 return wmem_strbuf_finalize(buf);
767}
768
769char *
770ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool_Bool add_quotes)
771{
772 return escape_string_len(alloc, string, len, escape_char, add_quotes, '"', false0);
773}
774
775char *
776ws_escape_string(wmem_allocator_t *alloc, const char *string, bool_Bool add_quotes)
777{
778 return escape_string_len(alloc, string, -1, escape_char, add_quotes, '"', false0);
779}
780
781char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool_Bool add_quotes)
782{
783 /* XXX: The existing behavior (maintained) here is not to escape
784 * backslashes even though NUL is escaped.
785 */
786 return escape_string_len(alloc, string, len, escape_null, add_quotes, add_quotes ? '"' : '\0', false0);
787}
788
789char *ws_escape_csv(wmem_allocator_t *alloc, const char *string, bool_Bool add_quotes, char quote_char, bool_Bool double_quote, bool_Bool escape_whitespace)
790{
791 if (escape_whitespace)
792 return escape_string_len(alloc, string, -1, escape_char, add_quotes, quote_char, double_quote);
793 else
794 return escape_string_len(alloc, string, -1, escape_null, add_quotes, quote_char, double_quote);
795}
796
797const char *
798ws_strerrorname_r(int errnum, char *buf, size_t buf_size)
799{
800#ifdef HAVE_STRERRORNAME_NP1
801 const char *errstr = strerrorname_np(errnum);
802 if (errstr != NULL((void*)0)) {
803 (void)g_strlcpy(buf, errstr, buf_size);
804 return buf;
805 }
806#endif
807 snprintf(buf, buf_size, "Errno(%d)", errnum);
808 return buf;
809}
810
811char *
812ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len)
813{
814 if (offset < 0)
815 return NULL((void*)0);
816
817 wmem_strbuf_t *buf = wmem_strbuf_new_sized(allocator, offset + len);
818
819 for (int i = 0; i < offset; i++) {
820 wmem_strbuf_append_c(buf, ' ');
821 }
822 wmem_strbuf_append_c(buf, '^');
823
824 for (size_t l = len; l > 1; l--) {
825 wmem_strbuf_append_c(buf, '~');
826 }
827
828 return wmem_strbuf_finalize(buf);
829}
830
831#define INITIAL_FMTBUF_SIZE128 128
832
833/*
834 * Declare, and initialize, the variables used for an output buffer.
835 */
836#define FMTBUF_VARSchar *fmtbuf = (char*)wmem_alloc(allocator, 128); unsigned fmtbuf_len
= 128; unsigned column = 0
\
837 char *fmtbuf = (char*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE128); \
838 unsigned fmtbuf_len = INITIAL_FMTBUF_SIZE128; \
839 unsigned column = 0
840
841/*
842 * Expand the buffer to be large enough to add nbytes bytes, plus a
843 * terminating '\0'.
844 */
845#define FMTBUF_EXPAND(nbytes)if (column+(nbytes+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 845, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(nbytes+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + nbytes + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 845, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
\
846 /* \
847 * Is there enough room for those bytes and also enough room for \
848 * a terminating '\0'? \
849 */ \
850 if (column+(nbytes+1) >= fmtbuf_len) { \
851 /* \
852 * Double the buffer's size if it's not big enough. \
853 * The size of the buffer starts at 128, so doubling its size \
854 * adds at least another 128 bytes, which is more than enough \
855 * for one more character plus a terminating '\0'. \
856 */ \
857 if (ckd_mul(&fmtbuf_len, fmtbuf_len, 2)__builtin_mul_overflow((fmtbuf_len), (2), (&fmtbuf_len))) { \
858 ws_debug("overflow!")do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 858, __func__, "overflow!"); } } while (0)
; \
859 FMTBUF_ENDSTRfmtbuf[column] = '\0'; \
860 return fmtbuf; \
861 } \
862 if (column+(nbytes+1) >= fmtbuf_len) { \
863 if (ckd_add(&fmtbuf_len, fmtbuf_len, (column + nbytes + 2) - fmtbuf_len)__builtin_add_overflow((fmtbuf_len), ((column + nbytes + 2) -
fmtbuf_len), (&fmtbuf_len))
) { \
864 ws_debug("overflow!")do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 864, __func__, "overflow!"); } } while (0)
; \
865 FMTBUF_ENDSTRfmtbuf[column] = '\0'; \
866 return fmtbuf; \
867 } \
868 } \
869 fmtbuf = (char *)wmem_realloc(allocator, fmtbuf, fmtbuf_len); \
870 }
871
872/*
873 * Put a byte into the buffer; space must have been ensured for it.
874 */
875#define FMTBUF_PUTCHAR(b)fmtbuf[column] = (b); column++ \
876 fmtbuf[column] = (b); \
877 column++
878
879/*
880 * Add the one-byte argument, as an octal escape sequence, to the end
881 * of the buffer.
882 */
883#define FMTBUF_PUTBYTE_OCTAL(b)fmtbuf[column] = ((((b)>>6)&03) + '0'); column++; fmtbuf
[column] = ((((b)>>3)&07) + '0'); column++; fmtbuf[
column] = ((((b)>>0)&07) + '0'); column++
\
884 FMTBUF_PUTCHAR((((b)>>6)&03) + '0')fmtbuf[column] = ((((b)>>6)&03) + '0'); column++; \
885 FMTBUF_PUTCHAR((((b)>>3)&07) + '0')fmtbuf[column] = ((((b)>>3)&07) + '0'); column++; \
886 FMTBUF_PUTCHAR((((b)>>0)&07) + '0')fmtbuf[column] = ((((b)>>0)&07) + '0'); column++
887
888/*
889 * Add the one-byte argument, as a hex escape sequence, to the end
890 * of the buffer.
891 */
892#define FMTBUF_PUTBYTE_HEX(b)fmtbuf[column] = ('\\'); column++; fmtbuf[column] = ('x'); column
++; fmtbuf[column] = (hex[((b) >> 4) & 0xF]); column
++; fmtbuf[column] = (hex[((b) >> 0) & 0xF]); column
++
\
893 FMTBUF_PUTCHAR('\\')fmtbuf[column] = ('\\'); column++; \
894 FMTBUF_PUTCHAR('x')fmtbuf[column] = ('x'); column++; \
895 FMTBUF_PUTCHAR(hex[((b) >> 4) & 0xF])fmtbuf[column] = (hex[((b) >> 4) & 0xF]); column++; \
896 FMTBUF_PUTCHAR(hex[((b) >> 0) & 0xF])fmtbuf[column] = (hex[((b) >> 0) & 0xF]); column++
897
898#define FMTBUF_PUTBYTES(bytes, len)if (column+(len+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 898, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(len+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + len + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 898, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); } memcpy(&fmtbuf[column], bytes, len
); column += (unsigned)len;
\
899 FMTBUF_EXPAND(len)if (column+(len+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 899, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(len+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + len + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 899, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
\
900 memcpy(&fmtbuf[column], bytes, len); \
901 column += (unsigned)len; // FMTBUF_EXPAND checks for overflow
902
903/*
904 * Put the trailing '\0' at the end of the buffer.
905 */
906#define FMTBUF_ENDSTRfmtbuf[column] = '\0' \
907 fmtbuf[column] = '\0'
908
909static char *
910format_text_internal(wmem_allocator_t *allocator,
911 const unsigned char *string, size_t len,
912 bool_Bool replace_space)
913{
914 FMTBUF_VARSchar *fmtbuf = (char*)wmem_alloc(allocator, 128); unsigned fmtbuf_len
= 128; unsigned column = 0
;
915 const unsigned char *prev = string;
916 const unsigned char *stringend = string + len;
917 unsigned char c;
918 size_t printable_bytes = 0;
919
920 while (string < stringend) {
921 /*
922 * Get the first byte of this character.
923 */
924 c = *string++;
925 if ((0x20 <= c) && (c < 0x7F)) {
926 /*
927 * Printable ASCII, so not part of a multi-byte UTF-8 sequence.
928 * Make sure there's enough room for one more byte, and add
929 * the character.
930 */
931 printable_bytes++;
932 } else {
933 if (printable_bytes) {
934 FMTBUF_PUTBYTES(prev, printable_bytes)if (column+(printable_bytes+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 934, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(printable_bytes+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + printable_bytes + 2) - fmtbuf_len),
(&fmtbuf_len))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG
, "wsutil/str_util.c", 934, __func__, "overflow!"); } } while
(0); fmtbuf[column] = '\0'; return fmtbuf; } } fmtbuf = (char
*)wmem_realloc(allocator, fmtbuf, fmtbuf_len); } memcpy(&
fmtbuf[column], prev, printable_bytes); column += (unsigned)printable_bytes
;
;
935 printable_bytes = 0;
936 }
937 if (replace_space && g_ascii_isspace(c)((g_ascii_table[(guchar) (c)] & G_ASCII_SPACE) != 0)) {
938 /*
939 * ASCII, so not part of a multi-byte UTF-8 sequence, but
940 * not printable, but is a space character; show it as a
941 * blank.
942 *
943 * Make sure there's enough room for one more byte, and add
944 * the blank.
945 */
946 FMTBUF_EXPAND(1)if (column+(1 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 946, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(1 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 1 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 946, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
947 FMTBUF_PUTCHAR(' ')fmtbuf[column] = (' '); column++;
948 } else if (c < 128) {
949 /*
950 * ASCII, so not part of a multi-byte UTF-8 sequence, but not
951 * printable.
952 *
953 * That requires a minimum of 2 bytes, one for the backslash
954 * and one for a letter, so make sure we have enough room
955 * for that, plus a trailing '\0'.
956 */
957 FMTBUF_EXPAND(2)if (column+(2 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 957, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(2 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 2 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 957, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
958 FMTBUF_PUTCHAR('\\')fmtbuf[column] = ('\\'); column++;
959 switch (c) {
960
961 case '\a':
962 FMTBUF_PUTCHAR('a')fmtbuf[column] = ('a'); column++;
963 break;
964
965 case '\b':
966 FMTBUF_PUTCHAR('b')fmtbuf[column] = ('b'); column++; /* BS */
967 break;
968
969 case '\f':
970 FMTBUF_PUTCHAR('f')fmtbuf[column] = ('f'); column++; /* FF */
971 break;
972
973 case '\n':
974 FMTBUF_PUTCHAR('n')fmtbuf[column] = ('n'); column++; /* NL */
975 break;
976
977 case '\r':
978 FMTBUF_PUTCHAR('r')fmtbuf[column] = ('r'); column++; /* CR */
979 break;
980
981 case '\t':
982 FMTBUF_PUTCHAR('t')fmtbuf[column] = ('t'); column++; /* tab */
983 break;
984
985 case '\v':
986 FMTBUF_PUTCHAR('v')fmtbuf[column] = ('v'); column++;
987 break;
988
989 default:
990 /*
991 * We've already put the backslash, but this
992 * will put 3 more characters for the octal
993 * number; make sure we have enough room for
994 * that, plus the trailing '\0'.
995 */
996 FMTBUF_EXPAND(3)if (column+(3 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 996, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(3 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 3 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 996, __func__, "overflow!"); } } while (0); fmtbuf[column] =
'\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
997 FMTBUF_PUTBYTE_OCTAL(c)fmtbuf[column] = ((((c)>>6)&03) + '0'); column++; fmtbuf
[column] = ((((c)>>3)&07) + '0'); column++; fmtbuf[
column] = ((((c)>>0)&07) + '0'); column++
;
998 break;
999 }
1000 } else {
1001 /*
1002 * We've fetched the first byte of a multi-byte UTF-8
1003 * sequence into c.
1004 */
1005 int utf8_len;
1006 unsigned char mask;
1007 gunichar uc;
1008 unsigned char first;
1009
1010 if ((c & 0xe0) == 0xc0) {
1011 /* Starts a 2-byte UTF-8 sequence; 1 byte left */
1012 utf8_len = 1;
1013 mask = 0x1f;
1014 } else if ((c & 0xf0) == 0xe0) {
1015 /* Starts a 3-byte UTF-8 sequence; 2 bytes left */
1016 utf8_len = 2;
1017 mask = 0x0f;
1018 } else if ((c & 0xf8) == 0xf0) {
1019 /* Starts a 4-byte UTF-8 sequence; 3 bytes left */
1020 utf8_len = 3;
1021 mask = 0x07;
1022 } else if ((c & 0xfc) == 0xf8) {
1023 /* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
1024 utf8_len = 4;
1025 mask = 0x03;
1026 } else if ((c & 0xfe) == 0xfc) {
1027 /* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
1028 utf8_len = 5;
1029 mask = 0x01;
1030 } else {
1031 /* 0xfe or 0xff or a continuation byte - not valid */
1032 utf8_len = -1;
1033 }
1034 if (utf8_len > 0) {
1035 /* Try to construct the Unicode character */
1036 uc = c & mask;
1037 for (int i = 0; i < utf8_len; i++) {
1038 if (string >= stringend) {
1039 /*
1040 * Ran out of octets, so the character is
1041 * incomplete. Put in a REPLACEMENT CHARACTER
1042 * instead, and then continue the loop, which
1043 * will terminate.
1044 */
1045 uc = UNICODE_REPLACEMENT_CHARACTER0x00FFFD;
1046 break;
1047 }
1048 c = *string;
1049 if ((c & 0xc0) != 0x80) {
1050 /*
1051 * Not valid UTF-8 continuation character; put in
1052 * a replacement character, and then re-process
1053 * this octet as the beginning of a new character.
1054 */
1055 uc = UNICODE_REPLACEMENT_CHARACTER0x00FFFD;
1056 break;
1057 }
1058 string++;
1059 uc = (uc << 6) | (c & 0x3f);
1060 }
1061
1062 /*
1063 * If this isn't a valid Unicode character, put in
1064 * a REPLACEMENT CHARACTER.
1065 */
1066 if (!g_unichar_validate(uc))
1067 uc = UNICODE_REPLACEMENT_CHARACTER0x00FFFD;
1068 } else {
1069 /* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
1070 uc = UNICODE_REPLACEMENT_CHARACTER0x00FFFD;
1071 }
1072
1073 /*
1074 * OK, is it a printable Unicode character?
1075 */
1076 if (g_unichar_isprint(uc)) {
1077 /*
1078 * Yes - put it into the string as UTF-8.
1079 * This means that if it was an overlong
1080 * encoding, this will put out the right
1081 * sized encoding.
1082 */
1083 if (uc < 0x80) {
1084 first = 0;
1085 utf8_len = 1;
1086 } else if (uc < 0x800) {
1087 first = 0xc0;
1088 utf8_len = 2;
1089 } else if (uc < 0x10000) {
1090 first = 0xe0;
1091 utf8_len = 3;
1092 } else if (uc < 0x200000) {
1093 first = 0xf0;
1094 utf8_len = 4;
1095 } else if (uc < 0x4000000) {
1096 /*
1097 * This should never happen, as Unicode doesn't
1098 * go that high.
1099 */
1100 first = 0xf8;
1101 utf8_len = 5;
1102 } else {
1103 /*
1104 * This should never happen, as Unicode doesn't
1105 * go that high.
1106 */
1107 first = 0xfc;
1108 utf8_len = 6;
1109 }
1110 FMTBUF_EXPAND(utf8_len)if (column+(utf8_len+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1110, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(utf8_len+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + utf8_len + 2) - fmtbuf_len), (&
fmtbuf_len))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG
, "wsutil/str_util.c", 1110, __func__, "overflow!"); } } while
(0); fmtbuf[column] = '\0'; return fmtbuf; } } fmtbuf = (char
*)wmem_realloc(allocator, fmtbuf, fmtbuf_len); }
;
1111 for (int i = utf8_len - 1; i > 0; i--) {
1112 fmtbuf[column + i] = (uc & 0x3f) | 0x80;
1113 uc >>= 6;
1114 }
1115 fmtbuf[column] = uc | first;
1116 column += utf8_len;
1117 } else if (replace_space && g_unichar_isspace(uc)) {
1118 /*
1119 * Not printable, but is a space character; show it
1120 * as a blank.
1121 *
1122 * Make sure there's enough room for one more byte,
1123 * and add the blank.
1124 */
1125 FMTBUF_EXPAND(1)if (column+(1 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1125, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(1 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 1 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 1125, __func__, "overflow!"); } } while (0); fmtbuf[column]
= '\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
1126 FMTBUF_PUTCHAR(' ')fmtbuf[column] = (' '); column++;
1127 } else if (c < 128) {
1128 /*
1129 * ASCII, but not printable.
1130 * Yes, this could happen with an overlong encoding.
1131 *
1132 * That requires a minimum of 2 bytes, one for the
1133 * backslash and one for a letter, so make sure we
1134 * have enough room for that, plus a trailing '\0'.
1135 */
1136 FMTBUF_EXPAND(2)if (column+(2 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1136, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(2 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 2 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 1136, __func__, "overflow!"); } } while (0); fmtbuf[column]
= '\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
1137 FMTBUF_PUTCHAR('\\')fmtbuf[column] = ('\\'); column++;
1138 switch (c) {
1139
1140 case '\a':
1141 FMTBUF_PUTCHAR('a')fmtbuf[column] = ('a'); column++;
1142 break;
1143
1144 case '\b':
1145 FMTBUF_PUTCHAR('b')fmtbuf[column] = ('b'); column++; /* BS */
1146 break;
1147
1148 case '\f':
1149 FMTBUF_PUTCHAR('f')fmtbuf[column] = ('f'); column++; /* FF */
1150 break;
1151
1152 case '\n':
1153 FMTBUF_PUTCHAR('n')fmtbuf[column] = ('n'); column++; /* NL */
1154 break;
1155
1156 case '\r':
1157 FMTBUF_PUTCHAR('r')fmtbuf[column] = ('r'); column++; /* CR */
1158 break;
1159
1160 case '\t':
1161 FMTBUF_PUTCHAR('t')fmtbuf[column] = ('t'); column++; /* tab */
1162 break;
1163
1164 case '\v':
1165 FMTBUF_PUTCHAR('v')fmtbuf[column] = ('v'); column++;
1166 break;
1167
1168 default:
1169 /*
1170 * We've already put the backslash, but this
1171 * will put 3 more characters for the octal
1172 * number; make sure we have enough room for
1173 * that, plus the trailing '\0'.
1174 */
1175 FMTBUF_EXPAND(3)if (column+(3 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1175, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(3 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 3 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 1175, __func__, "overflow!"); } } while (0); fmtbuf[column]
= '\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
1176 FMTBUF_PUTBYTE_OCTAL(c)fmtbuf[column] = ((((c)>>6)&03) + '0'); column++; fmtbuf
[column] = ((((c)>>3)&07) + '0'); column++; fmtbuf[
column] = ((((c)>>0)&07) + '0'); column++
;
1177 break;
1178 }
1179 } else {
1180 /*
1181 * Unicode, but not printable, and not ASCII;
1182 * put it out as \uxxxx or \Uxxxxxxxx.
1183 */
1184 if (uc <= 0xFFFF) {
1185 FMTBUF_EXPAND(6)if (column+(6 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1185, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(6 +1) >= fmtbuf_len) { if (__builtin_add_overflow(
(fmtbuf_len), ((column + 6 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 1185, __func__, "overflow!"); } } while (0); fmtbuf[column]
= '\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
1186 FMTBUF_PUTCHAR('\\')fmtbuf[column] = ('\\'); column++;
1187 FMTBUF_PUTCHAR('u')fmtbuf[column] = ('u'); column++;
1188 FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF])fmtbuf[column] = (hex[(uc >> 12) & 0xF]); column++;
1189 FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF])fmtbuf[column] = (hex[(uc >> 8) & 0xF]); column++;
1190 FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF])fmtbuf[column] = (hex[(uc >> 4) & 0xF]); column++;
1191 FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF])fmtbuf[column] = (hex[(uc >> 0) & 0xF]); column++;
1192 } else {
1193 FMTBUF_EXPAND(10)if (column+(10 +1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1193, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(10 +1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + 10 + 2) - fmtbuf_len), (&fmtbuf_len
))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG, "wsutil/str_util.c"
, 1193, __func__, "overflow!"); } } while (0); fmtbuf[column]
= '\0'; return fmtbuf; } } fmtbuf = (char *)wmem_realloc(allocator
, fmtbuf, fmtbuf_len); }
;
1194 FMTBUF_PUTCHAR('\\')fmtbuf[column] = ('\\'); column++;
1195 FMTBUF_PUTCHAR('U')fmtbuf[column] = ('U'); column++;
1196 FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF])fmtbuf[column] = (hex[(uc >> 28) & 0xF]); column++;
1197 FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF])fmtbuf[column] = (hex[(uc >> 24) & 0xF]); column++;
1198 FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF])fmtbuf[column] = (hex[(uc >> 20) & 0xF]); column++;
1199 FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF])fmtbuf[column] = (hex[(uc >> 16) & 0xF]); column++;
1200 FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF])fmtbuf[column] = (hex[(uc >> 12) & 0xF]); column++;
1201 FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF])fmtbuf[column] = (hex[(uc >> 8) & 0xF]); column++;
1202 FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF])fmtbuf[column] = (hex[(uc >> 4) & 0xF]); column++;
1203 FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF])fmtbuf[column] = (hex[(uc >> 0) & 0xF]); column++;
1204 }
1205 }
1206 }
1207 prev = string;
1208 }
1209 }
1210 if (printable_bytes) {
1211 FMTBUF_PUTBYTES(prev, printable_bytes)if (column+(printable_bytes+1) >= fmtbuf_len) { if (__builtin_mul_overflow
((fmtbuf_len), (2), (&fmtbuf_len))) { do { if (1) { ws_log_full
("", LOG_LEVEL_DEBUG, "wsutil/str_util.c", 1211, __func__, "overflow!"
); } } while (0); fmtbuf[column] = '\0'; return fmtbuf; } if (
column+(printable_bytes+1) >= fmtbuf_len) { if (__builtin_add_overflow
((fmtbuf_len), ((column + printable_bytes + 2) - fmtbuf_len),
(&fmtbuf_len))) { do { if (1) { ws_log_full("", LOG_LEVEL_DEBUG
, "wsutil/str_util.c", 1211, __func__, "overflow!"); } } while
(0); fmtbuf[column] = '\0'; return fmtbuf; } } fmtbuf = (char
*)wmem_realloc(allocator, fmtbuf, fmtbuf_len); } memcpy(&
fmtbuf[column], prev, printable_bytes); column += (unsigned)printable_bytes
;
;
1212 printable_bytes = 0;
Value stored to 'printable_bytes' is never read
1213 }
1214
1215 FMTBUF_ENDSTRfmtbuf[column] = '\0';
1216
1217 return fmtbuf;
1218}
1219
1220/*
1221 * Given a wmem scope, a not-necessarily-null-terminated string,
1222 * expected to be in UTF-8 but possibly containing invalid sequences
1223 * (as it may have come from packet data), and the length of the string,
1224 * generate a valid UTF-8 string from it, allocated in the specified
1225 * wmem scope, that:
1226 *
1227 * shows printable Unicode characters as themselves;
1228 *
1229 * shows non-printable ASCII characters as C-style escapes (octal
1230 * if not one of the standard ones such as LF -> '\n');
1231 *
1232 * shows non-printable Unicode-but-not-ASCII characters as
1233 * their universal character names;
1234 *
1235 * shows illegal UTF-8 sequences as a sequence of bytes represented
1236 * as C-style hex escapes (XXX: Does not actually do this. Some illegal
1237 * sequences, such as overlong encodings, the sequences reserved for
1238 * UTF-16 surrogate halves (paired or unpaired), and values outside
1239 * Unicode (i.e., the old sequences for code points above U+10FFFF)
1240 * will be decoded in a permissive way. Other illegal sequences,
1241 * such 0xFE and 0xFF and the presence of a continuation byte where
1242 * not expected (or vice versa its absence), are replaced with
1243 * REPLACEMENT CHARACTER.)
1244 *
1245 * and return a pointer to it.
1246 */
1247char *
1248format_text(wmem_allocator_t *allocator,
1249 const char *string, size_t len)
1250{
1251 return format_text_internal(allocator, (const uint8_t*)string, len, false0);
1252}
1253
1254/** Given a wmem scope and a null-terminated string, expected to be in
1255 * UTF-8 but possibly containing invalid sequences (as it may have come
1256 * from packet data), and the length of the string, generate a valid
1257 * UTF-8 string from it, allocated in the specified wmem scope, that:
1258 *
1259 * shows printable Unicode characters as themselves;
1260 *
1261 * shows non-printable ASCII characters as C-style escapes (octal
1262 * if not one of the standard ones such as LF -> '\n');
1263 *
1264 * shows non-printable Unicode-but-not-ASCII characters as
1265 * their universal character names;
1266 *
1267 * shows illegal UTF-8 sequences as a sequence of bytes represented
1268 * as C-style hex escapes;
1269 *
1270 * and return a pointer to it.
1271 */
1272char *
1273format_text_string(wmem_allocator_t* allocator, const char *string)
1274{
1275 return format_text_internal(allocator, (const uint8_t*)string, strlen(string), false0);
1276}
1277
1278/*
1279 * Given a string, generate a string from it that shows non-printable
1280 * characters as C-style escapes except a whitespace character
1281 * (space, tab, carriage return, new line, vertical tab, or formfeed)
1282 * which will be replaced by a space, and return a pointer to it.
1283 */
1284char *
1285format_text_wsp(wmem_allocator_t* allocator, const char *string, size_t len)
1286{
1287 return format_text_internal(allocator, (const uint8_t*)string, len, true1);
1288}
1289
1290/*
1291 * Given a string, generate a string from it that shows non-printable
1292 * characters as the chr parameter passed, except a whitespace character
1293 * (space, tab, carriage return, new line, vertical tab, or formfeed)
1294 * which will be replaced by a space, and return a pointer to it.
1295 *
1296 * This does *not* treat the input string as UTF-8.
1297 *
1298 * This is useful for displaying binary data that frequently but not always
1299 * contains text; otherwise the number of C escape codes makes it unreadable.
1300 */
1301char *
1302format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, char chr)
1303{
1304 wmem_strbuf_t *buf;
1305
1306 buf = wmem_strbuf_new_sized(allocator, len + 1);
1307 for (const char *p = string; p < string + len; p++) {
1308 if (g_ascii_isprint(*p)((g_ascii_table[(guchar) (*p)] & G_ASCII_PRINT) != 0)) {
1309 wmem_strbuf_append_c(buf, *p);
1310 }
1311 else if (g_ascii_isspace(*p)((g_ascii_table[(guchar) (*p)] & G_ASCII_SPACE) != 0)) {
1312 wmem_strbuf_append_c(buf, ' ');
1313 }
1314 else {
1315 wmem_strbuf_append_c(buf, chr);
1316 }
1317 }
1318 return wmem_strbuf_finalize(buf);
1319}
1320
1321char *
1322format_char(wmem_allocator_t *allocator, char c)
1323{
1324 char *buf;
1325 char r;
1326
1327 if (g_ascii_isprint(c)((g_ascii_table[(guchar) (c)] & G_ASCII_PRINT) != 0)) {
1328 buf = wmem_alloc_array(allocator, char, 2)((char*)wmem_alloc((allocator), (((((2)) <= 0) || ((size_t
)sizeof(char) > (9223372036854775807L / (size_t)((2))))) ?
0 : (sizeof(char) * ((2))))))
;
1329 buf[0] = c;
1330 buf[1] = '\0';
1331 return buf;
1332 }
1333 if (escape_char(c, &r)) {
1334 buf = wmem_alloc_array(allocator, char, 3)((char*)wmem_alloc((allocator), (((((3)) <= 0) || ((size_t
)sizeof(char) > (9223372036854775807L / (size_t)((3))))) ?
0 : (sizeof(char) * ((3))))))
;
1335 buf[0] = '\\';
1336 buf[1] = r;
1337 buf[2] = '\0';
1338 return buf;
1339 }
1340 buf = wmem_alloc_array(allocator, char, 5)((char*)wmem_alloc((allocator), (((((5)) <= 0) || ((size_t
)sizeof(char) > (9223372036854775807L / (size_t)((5))))) ?
0 : (sizeof(char) * ((5))))))
;
1341 buf[0] = '\\';
1342 buf[1] = 'x';
1343 buf[2] = hex[((uint8_t)c >> 4) & 0xF];
1344 buf[3] = hex[((uint8_t)c >> 0) & 0xF];
1345 buf[4] = '\0';
1346 return buf;
1347}
1348
1349char*
1350ws_utf8_truncate(char *string, size_t len)
1351{
1352 char* last_char;
1353
1354 /* Ensure that it is null terminated */
1355 string[len] = '\0';
1356 last_char = g_utf8_find_prev_char(string, string + len);
1357 if (last_char != NULL((void*)0) && g_utf8_get_char_validated(last_char, -1) == (gunichar)-2) {
1358 /* The last UTF-8 character was truncated into a partial sequence. */
1359 *last_char = '\0';
1360 }
1361 return string;
1362}
1363
1364/* ASCII/EBCDIC conversion tables from
1365 * https://web.archive.org/web/20060813174742/http://www.room42.com/store/computer_center/code_tables.shtml
1366 */
1367#if 0
1368static const uint8_t ASCII_translate_EBCDIC [ 256 ] = {
1369 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
1370 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1371 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
1372 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1373 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
1374 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
1375 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
1376 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
1377 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
1378 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
1379 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
1380 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
1381 0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
1382 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
1383 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
1384 0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
1385 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1386 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1387 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1388 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1389 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1390 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1391 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1392 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1393 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1394 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1395 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1396 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1397 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1398 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1399 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
1400 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
1401};
1402
1403void
1404ASCII_to_EBCDIC(uint8_t *buf, unsigned bytes)
1405{
1406 unsigned i;
1407 uint8_t *bufptr;
1408
1409 bufptr = buf;
1410
1411 for (i = 0; i < bytes; i++, bufptr++) {
1412 *bufptr = ASCII_translate_EBCDIC[*bufptr];
1413 }
1414}
1415
1416uint8_t
1417ASCII_to_EBCDIC1(uint8_t c)
1418{
1419 return ASCII_translate_EBCDIC[c];
1420}
1421#endif
1422
1423static const uint8_t EBCDIC_translate_ASCII [ 256 ] = {
1424 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1425 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1426 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1427 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
1428 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
1429 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
1430 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1431 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
1432 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1433 0x2E, 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
1434 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1435 0x2E, 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
1436 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1437 0x2E, 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
1438 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1439 0x2E, 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
1440 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
1441 0x68, 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1442 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
1443 0x71, 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1444 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
1445 0x79, 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
1446 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1447 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
1448 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
1449 0x48, 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1450 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
1451 0x51, 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1452 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
1453 0x59, 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
1454 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
1455 0x38, 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
1456};
1457
1458void
1459EBCDIC_to_ASCII(uint8_t *buf, unsigned bytes)
1460{
1461 unsigned i;
1462 uint8_t *bufptr;
1463
1464 bufptr = buf;
1465
1466 for (i = 0; i < bytes; i++, bufptr++) {
1467 *bufptr = EBCDIC_translate_ASCII[*bufptr];
1468 }
1469}
1470
1471uint8_t
1472EBCDIC_to_ASCII1(uint8_t c)
1473{
1474 return EBCDIC_translate_ASCII[c];
1475}
1476
1477/*
1478 * This routine is based on a routine created by Dan Lasley
1479 * <DLASLEY@PROMUS.com>.
1480 *
1481 * It was modified for Wireshark by Gilbert Ramirez and others.
1482 */
1483
1484#define MAX_OFFSET_LEN8 8 /* max length of hex offset of bytes */
1485#define BYTES_PER_LINE16 16 /* max byte values printed on a line */
1486#define HEX_DUMP_LEN(16*3) (BYTES_PER_LINE16*3)
1487 /* max number of characters hex dump takes -
1488 2 digits plus trailing blank */
1489#define DATA_DUMP_LEN((16*3) + 2 + 2 + 16) (HEX_DUMP_LEN(16*3) + 2 + 2 + BYTES_PER_LINE16)
1490 /* number of characters those bytes take;
1491 3 characters per byte of hex dump,
1492 2 blanks separating hex from ASCII,
1493 2 optional ASCII dump delimiters,
1494 1 character per byte of ASCII dump */
1495#define MAX_LINE_LEN(8 + 2 + ((16*3) + 2 + 2 + 16)) (MAX_OFFSET_LEN8 + 2 + DATA_DUMP_LEN((16*3) + 2 + 2 + 16))
1496 /* number of characters per line;
1497 offset, 2 blanks separating offset
1498 from data dump, data dump */
1499
1500bool_Bool
1501hex_dump_buffer(bool_Bool (*print_line)(void *, const char *), void *fp,
1502 const unsigned char *cp, unsigned length,
1503 hex_dump_enc encoding,
1504 unsigned ascii_option)
1505{
1506 register unsigned int ad, i, j, k, l;
1507 unsigned char c;
1508 char line[MAX_LINE_LEN(8 + 2 + ((16*3) + 2 + 2 + 16)) + 1];
1509 unsigned int use_digits;
1510
1511 static const char binhex[16] = {
1512 '0', '1', '2', '3', '4', '5', '6', '7',
1513 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
1514
1515 /*
1516 * How many of the leading digits of the offset will we supply?
1517 * We always supply at least 4 digits, but if the maximum offset
1518 * won't fit in 4 digits, we use as many digits as will be needed.
1519 */
1520 if (((length - 1) & 0xF0000000) != 0)
1521 use_digits = 8; /* need all 8 digits */
1522 else if (((length - 1) & 0x0F000000) != 0)
1523 use_digits = 7; /* need 7 digits */
1524 else if (((length - 1) & 0x00F00000) != 0)
1525 use_digits = 6; /* need 6 digits */
1526 else if (((length - 1) & 0x000F0000) != 0)
1527 use_digits = 5; /* need 5 digits */
1528 else
1529 use_digits = 4; /* we'll supply 4 digits */
1530
1531 ad = 0;
1532 i = 0;
1533 j = 0;
1534 k = 0;
1535 while (i < length) {
1536 if ((i & 15) == 0) {
1537 /*
1538 * Start of a new line.
1539 */
1540 j = 0;
1541 l = use_digits;
1542 do {
1543 l--;
1544 c = (ad >> (l*4)) & 0xF;
1545 line[j++] = binhex[c];
1546 } while (l != 0);
1547 line[j++] = ' ';
1548 line[j++] = ' ';
1549 memset(line+j, ' ', DATA_DUMP_LEN((16*3) + 2 + 2 + 16));
1550
1551 /*
1552 * Offset in line of ASCII dump.
1553 */
1554 k = j + HEX_DUMP_LEN(16*3) + 2;
1555 if (ascii_option == HEXDUMP_ASCII_DELIMIT(0x0001U))
1556 line[k++] = '|';
1557 }
1558 c = *cp++;
1559 line[j++] = binhex[c>>4];
1560 line[j++] = binhex[c&0xf];
1561 j++;
1562 if (ascii_option != HEXDUMP_ASCII_EXCLUDE(0x0002U) ) {
1563 if (encoding == HEXDUMP_ENC_EBCDIC) {
1564 c = EBCDIC_to_ASCII1(c);
1565 }
1566 line[k++] = ((c >= ' ') && (c < 0x7f)) ? c : '.';
1567 }
1568 i++;
1569 if (((i & 15) == 0) || (i == length)) {
1570 /*
1571 * We'll be starting a new line, or
1572 * we're finished printing this buffer;
1573 * dump out the line we've constructed,
1574 * and advance the offset.
1575 */
1576 if (ascii_option == HEXDUMP_ASCII_DELIMIT(0x0001U))
1577 line[k++] = '|';
1578 line[k] = '\0';
1579 if (!print_line(fp, line))
1580 return false0;
1581 ad += 16;
1582 }
1583 }
1584 return true1;
1585}
1586
1587/*
1588 * Editor modelines - https://www.wireshark.org/tools/modelines.html
1589 *
1590 * Local variables:
1591 * c-basic-offset: 4
1592 * tab-width: 8
1593 * indent-tabs-mode: nil
1594 * End:
1595 *
1596 * vi: set shiftwidth=4 tabstop=8 expandtab:
1597 * :indentSize=4:tabSize=8:noTabs=true:
1598 */