diff mbox series

[babeltrace,v4,1/2] Fix: bitfield: left shiftundefined behavior

Message ID 20190510174750.13152-1-mathieu.desnoyers@efficios.com
State Superseded, archived
Headers show
Series [babeltrace,v4,1/2] Fix: bitfield: left shiftundefined behavior | expand

Commit Message

Mathieu Desnoyers May 10, 2019, 5:47 p.m. UTC
bitfield.h uses the left shift operator with a left operand which
may be negative. The C99 standard states that shifting a negative
value is undefined.

When building with -Wshift-negative-value, we get this gcc warning:

In file included from /home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h:44:0,
                 from /home/smarchi/src/babeltrace/ctfser/ctfser.c:42:
/home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h: In function ?bt_ctfser_write_unsigned_int?:
/home/smarchi/src/babeltrace/include/babeltrace/bitfield-internal.h:116:24: error: left shift of negative value [-Werror=shift-negative-value]
   mask = ~((~(type) 0) << (__start % ts));  \
                        ^
/home/smarchi/src/babeltrace/include/babeltrace/bitfield-internal.h:222:2: note: in expansion of macro ?_bt_bitfield_write_le?
  _bt_bitfield_write_le(ptr, type, _start, _length, _v)
  ^~~~~~~~~~~~~~~~~~~~~
/home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h:418:3: note: in expansion of macro ?bt_bitfield_write_le?
   bt_bitfield_write_le(mmap_align_addr(ctfser->base_mma) +
   ^~~~~~~~~~~~~~~~~~~~

This boils down to the fact that the expression ~((uint8_t)0) has type
"signed int", which is used as an operand of the left shift.  This is due
to the integer promotion rules of C99 (6.3.3.1):

    If an int can represent all values of the original type, the value is
    converted to an int; otherwise, it is converted to an unsigned int.
    These are called the integer promotions. All other types are unchanged
    by the integer promotions.

We also need to cast the result explicitly into the left hand
side type to deal with:

warning: large integer implicitly truncated to unsigned type [-Woverflow]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
Change-Id: I779a55228c6256188eddc94ced6ceb77ffc8515d
---
Changes since v1:
- Generate compile-time error if the type argument passed to
  _bt_unsigned_cast() is larger than sizeof(uint64_t), this
  allows removing _bt_check_max_64bit,
- Introduce _br_fill_mask, which replaces _bt_bitwise_not,
- Clarify _bt_unsigned_cast comment,
- Expand explanation of the issue within the patch commit message.

Changes since v2:
- Fix unwanted sign extension when generating masks,
- Introduce macro helpers to clarify code:
  - _bt_cast_value_to_unsigned()
  - _bt_cast_value_to_unsigned_type(),
  - _bt_make_mask_complement(),
  - _bt_make_mask().

Changes since v3:
- Fix additional left shift undefined behavior. Identified with
  -fsanitize=undefined.
- Create fallback for right shift implementation-defined behavior
  if the behavior is found not to be as intended. Detect the behavior
  with a preprocessor check.
- Ensure that the compiler represents signed types with two's complement
  with a preprocessor check.
- Add _bt_rshift() and _bt_lshift() shift helpers to take care of
  C99's undefined behaviors related to shifting signed types.
---
 include/babeltrace/bitfield-internal.h | 216 ++++++++++++++++++++++-----------
 1 file changed, 147 insertions(+), 69 deletions(-)
diff mbox series

Patch

diff --git a/include/babeltrace/bitfield-internal.h b/include/babeltrace/bitfield-internal.h
index c5d5eccd..3254cf80 100644
--- a/include/babeltrace/bitfield-internal.h
+++ b/include/babeltrace/bitfield-internal.h
@@ -2,7 +2,7 @@ 
 #define _BABELTRACE_BITFIELD_H
 
 /*
- * Copyright 2010 - Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
+ * Copyright 2010-2019 - Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,108 @@ 
 #include <babeltrace/compat/limits-internal.h>	/* C99 5.2.4.2 Numerical limits */
 #include <babeltrace/endian-internal.h>	/* Non-standard BIG_ENDIAN, LITTLE_ENDIAN, BYTE_ORDER */
 
-/* We can't shift a int from 32 bit, >> 32 and << 32 on int is undefined */
+/*
+ * This bitfield header requires the compiler representation of signed
+ * integers to be two's complement.
+ */
+#if (-1 != ~0)
+#error "bitfield.h requires the compiler representation of signed integers to be two's complement."
+#endif
+
+#define _bt_is_signed_type(type)	((type) -1 < (type) 0)
+
+/*
+ * Cast value `v` to an unsigned integer of the same size as `v`.
+ */
+#define _bt_cast_value_to_unsigned(v)					\
+	(sizeof(v) == sizeof(uint8_t) ? (uint8_t) (v) :			\
+	sizeof(v) == sizeof(uint16_t) ? (uint16_t) (v) :		\
+	sizeof(v) == sizeof(uint32_t) ? (uint32_t) (v) :		\
+	sizeof(v) == sizeof(uint64_t) ? (uint64_t) (v) :		\
+	sizeof(struct { int f:(sizeof(v) > sizeof(uint64_t) ? -1 : 1); }))
+
+/*
+ * Cast value `v` to an unsigned integer type of the size of type `type`
+ * *without* sign-extension.
+ *
+ * The unsigned cast ensures that we're not shifting a negative value,
+ * which is undefined in C. However, this limits the maximum type size
+ * of `type` to 64-bit. Generate a compile-time error if the size of
+ * `type` is larger than 64-bit.
+ */
+#define _bt_cast_value_to_unsigned_type(type, v)			\
+	(sizeof(type) == sizeof(uint8_t) ?				\
+		(uint8_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint16_t) ?				\
+		(uint16_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint32_t) ?				\
+		(uint32_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint64_t) ?				\
+		(uint64_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(struct { int f:(sizeof(type) > sizeof(uint64_t) ? -1 : 1); }))
+
+/*
+ * _bt_fill_mask evaluates to a "type" integer with all bits set.
+ */
+#define _bt_fill_mask(type)	((type) ~(type) 0)
+
+/*
+ * Left shift a value `v` of `shift` bits.
+ *
+ * The type of `v` can be signed or unsigned integer.
+ * The value of `shift` must be less than the size of `v` (in bits),
+ * otherwise the behavior is undefined.
+ *
+ * According to the C99 standard, left shift of a left hand-side signed
+ * type is undefined if it has a negative value or if the result cannot
+ * be represented in the result type. This bitfield header discards the
+ * bits that are left-shifted beyond the result type representation,
+ * which is the behavior of an unsigned type left shift operation.
+ * Therefore, always perform left shift on an unsigned type.
+ */
+#define _bt_lshift(v, shift)						\
+	((typeof(v)) (_bt_cast_value_to_unsigned(v) << (shift)))
+
+/*
+ * Generate a mask of type `type` with the `length` least significant bits
+ * cleared, and the most significant bits set.
+ */
+#define _bt_make_mask_complement(type, length)				\
+	_bt_lshift(_bt_fill_mask(type), length)
+
+/*
+ * Generate a mask of type `type` with the `length` least significant bits
+ * set, and the most significant bits cleared.
+ */
+#define _bt_make_mask(type, length)					\
+	((type) ~_bt_make_mask_complement(type, length))
+
+/*
+ * Right shift a value `v` of `shift` bits.
+ *
+ * The type of `v` can be signed or unsigned integer.
+ * The value of `shift` must be less than the size of `v` (in bits),
+ * otherwise the behavior is undefined.
+ *
+ * According to the C99 standard, right shift of a left hand-side signed
+ * type which has a negative value is implementation defined. This
+ * bitfield header relies on the right shift implementation carrying the
+ * sign bit. If the compiler implementation has a different behavior,
+ * emulate carrying the sign bit.
+ */
+#if ((-1 >> 1) == -1)
+#define _bt_rshift(v, shift)	((v) >> (shift))
+#else
+#define _bt_rshift(v, shift)						\
+	((typeof(v)) ((_bt_cast_value_to_unsigned(v) >> (shift)) |	\
+		((v) < 0 ? _bt_make_mask_complement(typeof(v),		\
+			sizeof(v) * CHAR_BIT - (shift)) : 0)))
+#endif
+
+/*
+ * Right shift a signed or unsigned integer with `_shift` value being an
+ * arbitrary number of bits.
+ */
 #define _bt_piecewise_rshift(_v, _shift)				\
 ({									\
 	typeof(_v) ___v = (_v);						\
@@ -36,10 +137,14 @@ 
 	unsigned long final = (___shift) % (sizeof(___v) * CHAR_BIT - 1); \
 									\
 	for (; sb; sb--)						\
-		___v >>= sizeof(___v) * CHAR_BIT - 1;			\
-	___v >>= final;							\
+		___v = _bt_rshift(___v, sizeof(___v) * CHAR_BIT - 1);	\
+	___v = _bt_rshift(___v, final);					\
 })
 
+/*
+ * Left shift a signed or unsigned integer with `_shift` value being an
+ * arbitrary number of bits.
+ */
 #define _bt_piecewise_lshift(_v, _shift)				\
 ({									\
 	typeof(_v) ___v = (_v);						\
@@ -48,29 +153,10 @@ 
 	unsigned long final = (___shift) % (sizeof(___v) * CHAR_BIT - 1); \
 									\
 	for (; sb; sb--)						\
-		___v <<= sizeof(___v) * CHAR_BIT - 1;			\
-	___v <<= final;							\
+		___v = _bt_lshift(___v, sizeof(___v) * CHAR_BIT - 1);	\
+	___v = _bt_lshift(___v, final);					\
 })
 
-#define _bt_is_signed_type(type)	((type) -1 < (type) 0)
-
-/*
- * NOTE: The cast to (uint64_t) below ensures that we're not casting a
- * negative value, which is undefined in C. However, this limits the
- * maximum type size of `type` and `v` to 64-bit. The
- * _bt_check_max_64bit() is used to check that the users of this header
- * do not use types with a size greater than 64-bit.
- */
-#define _bt_unsigned_cast(type, v)					\
-({									\
-	(sizeof(v) < sizeof(type)) ?					\
-		((type) (v)) & ((type) (~(~(uint64_t) 0 << (sizeof(v) * CHAR_BIT)))) : \
-		(type) (v);						\
-})
-
-#define _bt_check_max_64bit(type)					\
-	char _max_64bit_assertion[sizeof(type) <= sizeof(uint64_t) ? 1 : -1] __attribute__((unused))
-
 /*
  * bt_bitfield_write - write integer to a bitfield in native endianness
  *
@@ -108,15 +194,15 @@  do {									\
 									\
 	/* Trim v high bits */						\
 	if (__length < sizeof(__v) * CHAR_BIT)				\
-		__v &= ~((~(typeof(__v)) 0) << __length);		\
+		__v &= _bt_make_mask(typeof(__v), __length);		\
 									\
 	/* We can now append v with a simple "or", shift it piece-wise */ \
 	this_unit = start_unit;						\
 	if (start_unit == end_unit - 1) {				\
-		mask = ~((~(type) 0) << (__start % ts));		\
+		mask = _bt_make_mask(type, __start % ts);		\
 		if (end % ts)						\
-			mask |= (~(type) 0) << (end % ts);		\
-		cmask = (type) __v << (__start % ts);			\
+			mask |= _bt_make_mask_complement(type, end % ts); \
+		cmask = _bt_lshift((type) (__v), __start % ts);		\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -124,8 +210,8 @@  do {									\
 	}								\
 	if (__start % ts) {						\
 		cshift = __start % ts;					\
-		mask = ~((~(type) 0) << cshift);			\
-		cmask = (type) __v << cshift;				\
+		mask = _bt_make_mask(type, cshift);			\
+		cmask = _bt_lshift((type) (__v), cshift);		\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -139,7 +225,7 @@  do {									\
 		__start += ts;						\
 	}								\
 	if (end % ts) {							\
-		mask = (~(type) 0) << (end % ts);			\
+		mask = _bt_make_mask_complement(type, end % ts);	\
 		cmask = (type) __v;					\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
@@ -167,15 +253,15 @@  do {									\
 									\
 	/* Trim v high bits */						\
 	if (__length < sizeof(__v) * CHAR_BIT)				\
-		__v &= ~((~(typeof(__v)) 0) << __length);		\
+		__v &= _bt_make_mask(typeof(__v), __length);		\
 									\
 	/* We can now append v with a simple "or", shift it piece-wise */ \
 	this_unit = end_unit - 1;					\
 	if (start_unit == end_unit - 1) {				\
-		mask = ~((~(type) 0) << ((ts - (end % ts)) % ts));	\
+		mask = _bt_make_mask(type, (ts - (end % ts)) % ts);	\
 		if (__start % ts)					\
-			mask |= (~((type) 0)) << (ts - (__start % ts));	\
-		cmask = (type) __v << ((ts - (end % ts)) % ts);		\
+			mask |= _bt_make_mask_complement(type, ts - (__start % ts)); \
+		cmask = _bt_lshift((type) (__v), (ts - (end % ts)) % ts); \
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -183,8 +269,8 @@  do {									\
 	}								\
 	if (end % ts) {							\
 		cshift = end % ts;					\
-		mask = ~((~(type) 0) << (ts - cshift));			\
-		cmask = (type) __v << (ts - cshift);			\
+		mask = _bt_make_mask(type, ts - cshift);		\
+		cmask = _bt_lshift((type) (__v), ts - cshift);		\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -198,7 +284,7 @@  do {									\
 		end -= ts;						\
 	}								\
 	if (__start % ts) {						\
-		mask = (~(type) 0) << (ts - (__start % ts));		\
+		mask = _bt_make_mask(type, ts - (__start % ts));	\
 		cmask = (type) __v;					\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
@@ -252,10 +338,6 @@  do {									\
 	unsigned long start_unit, end_unit, this_unit;			\
 	unsigned long end, cshift; /* cshift is "complement shift" */	\
 									\
-	{ _bt_check_max_64bit(type); }					\
-	{ _bt_check_max_64bit(typeof(*_vptr)); }			\
-	{ _bt_check_max_64bit(typeof(*_ptr)); }				\
-									\
 	if (!__length) {						\
 		*__vptr = 0;						\
 		break;							\
@@ -267,47 +349,47 @@  do {									\
 									\
 	this_unit = end_unit - 1;					\
 	if (_bt_is_signed_type(typeof(__v))				\
-	    && (__ptr[this_unit] & ((type) 1 << ((end % ts ? : ts) - 1)))) \
+	    && (__ptr[this_unit] & _bt_lshift((type) 1, (end % ts ? : ts) - 1))) \
 		__v = ~(typeof(__v)) 0;					\
 	else								\
 		__v = 0;						\
 	if (start_unit == end_unit - 1) {				\
 		cmask = __ptr[this_unit];				\
-		cmask >>= (__start % ts);				\
+		cmask = _bt_rshift(cmask, __start % ts);		\
 		if ((end - __start) % ts) {				\
-			mask = ~((~(type) 0) << (end - __start));	\
+			mask = _bt_make_mask(type, end - __start);	\
 			cmask &= mask;					\
 		}							\
 		__v = _bt_piecewise_lshift(__v, end - __start);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		*__vptr = __v;						\
 		break;							\
 	}								\
 	if (end % ts) {							\
 		cshift = end % ts;					\
-		mask = ~((~(type) 0) << cshift);			\
+		mask = _bt_make_mask(type, cshift);			\
 		cmask = __ptr[this_unit];				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, cshift);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		end -= cshift;						\
 		this_unit--;						\
 	}								\
 	for (; (long) this_unit >= (long) start_unit + 1; this_unit--) { \
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 		end -= ts;						\
 	}								\
 	if (__start % ts) {						\
-		mask = ~((~(type) 0) << (ts - (__start % ts)));		\
+		mask = _bt_make_mask(type, ts - (__start % ts));	\
 		cmask = __ptr[this_unit];				\
-		cmask >>= (__start % ts);				\
+		cmask = _bt_rshift(cmask, __start % ts);		\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, ts - (__start % ts));	\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 	} else {							\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 	}								\
 	*__vptr = __v;							\
 } while (0)
@@ -323,10 +405,6 @@  do {									\
 	unsigned long start_unit, end_unit, this_unit;			\
 	unsigned long end, cshift; /* cshift is "complement shift" */	\
 									\
-	{ _bt_check_max_64bit(type); }					\
-	{ _bt_check_max_64bit(typeof(*_vptr)); }			\
-	{ _bt_check_max_64bit(typeof(*_ptr)); }				\
-									\
 	if (!__length) {						\
 		*__vptr = 0;						\
 		break;							\
@@ -338,47 +416,47 @@  do {									\
 									\
 	this_unit = start_unit;						\
 	if (_bt_is_signed_type(typeof(__v))				\
-	    && (__ptr[this_unit] & ((type) 1 << (ts - (__start % ts) - 1)))) \
+	    && (__ptr[this_unit] & _bt_lshift((type) 1, ts - (__start % ts) - 1))) \
 		__v = ~(typeof(__v)) 0;					\
 	else								\
 		__v = 0;						\
 	if (start_unit == end_unit - 1) {				\
 		cmask = __ptr[this_unit];				\
-		cmask >>= (ts - (end % ts)) % ts;			\
+		cmask = _bt_rshift(cmask, (ts - (end % ts)) % ts);	\
 		if ((end - __start) % ts) {				\
-			mask = ~((~(type) 0) << (end - __start));	\
+			mask = _bt_make_mask(type, end - __start);	\
 			cmask &= mask;					\
 		}							\
 		__v = _bt_piecewise_lshift(__v, end - __start);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		*__vptr = __v;						\
 		break;							\
 	}								\
 	if (__start % ts) {						\
 		cshift = __start % ts;					\
-		mask = ~((~(type) 0) << (ts - cshift));			\
+		mask = _bt_make_mask(type, ts - cshift);		\
 		cmask = __ptr[this_unit];				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, ts - cshift);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		__start += ts - cshift;					\
 		this_unit++;						\
 	}								\
 	for (; this_unit < end_unit - 1; this_unit++) {			\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 		__start += ts;						\
 	}								\
 	if (end % ts) {							\
-		mask = ~((~(type) 0) << (end % ts));			\
+		mask = _bt_make_mask(type, end % ts);			\
 		cmask = __ptr[this_unit];				\
-		cmask >>= ts - (end % ts);				\
+		cmask = _bt_rshift(cmask, ts - (end % ts));		\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, end % ts);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 	} else {							\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 	}								\
 	*__vptr = __v;							\
 } while (0)