***UNCHECKED*** Re: urcu workqueue thread uses 99% of cpu while workqueue is empty

Message ID 20220622074535.GA269641@localhost.localdomain
State New
Headers
Series ***UNCHECKED*** Re: urcu workqueue thread uses 99% of cpu while workqueue is empty |

Commit Message

Minlan Wang June 22, 2022, 7:45 a.m. UTC
  Hi, Mathieu,

On Tue, Jun 21, 2022 at 09:12:38AM -0400, Mathieu Desnoyers wrote:
> > @@ -0,0 +1,82 @@
> > +#undef	LTTNG_UST_TRACEPOINT_PROVIDER
> > +#define	LTTNG_UST_TRACEPOINT_PROVIVER workqueue
> 
> If you change "PROVIVER" into "PROVIDER" here, does it fix your issue ?
Yes, this did solve the compile problem of the tracepoint patch.
Stupid mistake of mine...
This is the working patch for lttng_ust_tracepoint in workqueue, it is on top
of previous 2 debug patches:

>From 4e25d41b146fa374621582f04f2f9f59163c64a2 Mon Sep 17 00:00:00 2001
From: mlwang <mlwang at szsandstone.com>
Date: Wed, 22 Jun 2022 15:27:41 +0800
Subject: [PATCH] add tracepoint in workqueue.c

---
 src/Makefile.am       |   4 +-
 src/liburcu-cds.pc.in |   2 +-
 src/workqueue.c       |  29 ++++--------
 src/workqueue.h       |   1 +
 src/workqueue_tp.c    |   4 ++
 src/workqueue_tp.h    | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 141 insertions(+), 24 deletions(-)
 create mode 100644 src/workqueue_tp.c
 create mode 100644 src/workqueue_tp.h
  

Comments

Mathieu Desnoyers June 22, 2022, 1:19 p.m. UTC | #1
----- On Jun 22, 2022, at 3:45 AM, Minlan Wang wangminlan at szsandstone.com wrote:

[...]

Hi Minlan,

> --
> 1.8.3.1
> 
> 
> And the lttng session is configured to trace these events:
> kernel: syscall futex

On the kernel side, in addition to the syscall futex, I would really like to see what
happens in the scheduler, mainly the wait/wakeup tracepoints. This can be added by using:

lttng enable-event -k 'sched_*'

This should help us confirm whether we indeed have a situation where queued wake ups
happen to wake up a wait happening only later, which is unexpected by the current liburcu
userspace code.

[...]

> ---
> 
> The babletrace output of this session is pretty big, 6 MB in size, i put it in
> the attachment trace_0622.tar.bz2.
> Let my know if your mailbox can't handle such big attachment.

It would be even better if you can share the binary trace, because then it's easy to
load it in trace compass, cut away time ranges that don't matter, and lots of other
useful stuff.

Thanks,

Mathieu
  

Patch

diff --git a/src/Makefile.am b/src/Makefile.am
index 88ccc1f..5a18e31 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@  AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)
 
 #Add the -version-info directly here since we are only building
 # library that use the version-info
-AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION)
+AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION) -llttng-ust -ldl
 if USE_CYGWIN
 AM_LDFLAGS+=-no-undefined
 endif
@@ -56,7 +56,7 @@  liburcu_bp_la_SOURCES = urcu-bp.c urcu-pointer.c $(COMPAT)
 liburcu_bp_la_LIBADD = liburcu-common.la
 
 liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
-	workqueue.c workqueue.h $(RCULFHASH) $(COMPAT)
+	workqueue.c workqueue.h workqueue_tp.c workqueue_tp.h $(RCULFHASH) $(COMPAT)
 liburcu_cds_la_LIBADD = liburcu-common.la
 
 pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/liburcu-cds.pc.in b/src/liburcu-cds.pc.in
index e3d13af..f4e5e3e 100644
--- a/src/liburcu-cds.pc.in
+++ b/src/liburcu-cds.pc.in
@@ -7,5 +7,5 @@  Name: Userspace RCU Concurrent Data Structures
 Description: Data structures leveraging RCU and atomic operations to provide efficient concurrency-aware storage
 Version: @PACKAGE_VERSION@
 Requires:
-Libs: -L${libdir} -lurcu-cds
+Libs: -L${libdir} -lurcu-cds -llttng-ust -ldl
 Cflags: -I${includedir} 
diff --git a/src/workqueue.c b/src/workqueue.c
index 19fc89c..88a0f8f 100644
--- a/src/workqueue.c
+++ b/src/workqueue.c
@@ -137,16 +137,13 @@  static void futex_wait(int32_t *futex)
 	/* Read condition before read futex */
 	cmm_smp_mb();
 	if (uatomic_read(futex) != -1) {
-		fprintf(stderr, "%lu: wq %p: %s futex != -1, don't wait\n",
-			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
-
+		lttng_ust_tracepoint(workqueue, futex_no_wait, (long)futex, *futex);
 		return;
 	}
 wait:
 	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
 		err = errno;
-		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
-			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, err);
+		lttng_ust_tracepoint(workqueue, futex_wait_err, (long)futex, *futex, err);
 		switch (err) {
 		case EWOULDBLOCK:
 			/* Value already changed. */
@@ -160,16 +157,14 @@  wait:
 		}
 	}
 
-	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
-		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
+	lttng_ust_tracepoint(workqueue, futex_wait_return, (long)futex, *futex, ret);
 	/*
 	 * Prior queued wakeups can cause futex wait to return even
 	 * though the futex value is still -1. If this happens, wait
 	 * again.
 	 */
 	if (uatomic_read(futex) == -1) {
-		fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
-			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
+		lttng_ust_tracepoint(workqueue, futex_wait_again, (long)futex, *futex);
 		goto wait;
 	}
 }
@@ -184,15 +179,9 @@  static void futex_wake_up(int32_t *futex)
 	if (caa_unlikely(old == -1)) {
 		old = uatomic_xchg(futex, 0);
 		if (old == -1) {
-			fprintf(stderr, "%lu: wq %p, wakeup succeed: old %d\n",
-				pthread_self(),
-				caa_container_of(futex, struct urcu_workqueue, futex),
-				old);
+			lttng_ust_tracepoint(workqueue, futex_wake_up_success, (long)futex, old);
 		} else {
-			fprintf(stderr, "%lu: wq %p, wakeup failed: old %d\n",
-				pthread_self(),
-				caa_container_of(futex, struct urcu_workqueue, futex),
-				old);
+			lttng_ust_tracepoint(workqueue, futex_wake_up_fail, (long)futex, old);
 		}
 		if (futex_async(futex, FUTEX_WAKE, 1,
 				NULL, NULL, 0) < 0)
@@ -278,11 +267,9 @@  static void *workqueue_thread(void *arg)
 				futex_wait(&workqueue->futex);
 				new = uatomic_add_return(&workqueue->futex, -1);
 				if (new == -1) {
-					fprintf(stderr, "%lu: wq %p dec succeed: old %d, new %d\n",
-						pthread_self(), workqueue, new + 1, new);
+					lttng_ust_tracepoint(workqueue, futex_dec_success, (long)&workqueue->futex, new+1, new);
 				} else {
-					fprintf(stderr, "%lu: wq %p dec failed: old %d\n",
-						pthread_self(), workqueue, new + 1);
+					lttng_ust_tracepoint(workqueue, futex_dec_fail, (long)&workqueue->futex, new+1, new);
 				}
 				/*
 				 * Decrement futex before reading
diff --git a/src/workqueue.h b/src/workqueue.h
index 52b6973..63931a4 100644
--- a/src/workqueue.h
+++ b/src/workqueue.h
@@ -28,6 +28,7 @@ 
 #include <pthread.h>
 
 #include <urcu/wfcqueue.h>
+#include "workqueue_tp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/workqueue_tp.c b/src/workqueue_tp.c
new file mode 100644
index 0000000..a5f9586
--- /dev/null
+++ b/src/workqueue_tp.c
@@ -0,0 +1,4 @@ 
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+
+#include "workqueue_tp.h"
diff --git a/src/workqueue_tp.h b/src/workqueue_tp.h
new file mode 100644
index 0000000..b782f61
--- /dev/null
+++ b/src/workqueue_tp.h
@@ -0,0 +1,125 @@ 
+#undef	LTTNG_UST_TRACEPOINT_PROVIDER
+#define	LTTNG_UST_TRACEPOINT_PROVIDER workqueue
+
+#undef	LTTNG_UST_TRACEPOINT_INCLUDE
+#define	LTTNG_UST_TRACEPOINT_INCLUDE "./workqueue_tp.h"
+
+#if !defined(_WORKQUEUE_TP_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
+#define	_WORKQUEUE_TP_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_no_wait,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_err,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg,
+		int, err_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+		lttng_ust_field_integer(int, err, err_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_return,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg,
+		int, ret_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+		lttng_ust_field_integer(int, ret, ret_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_again,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_success,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_fail,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, val, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_dec_success,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, old_arg,
+		int, new_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, old, old_arg)
+		lttng_ust_field_integer(int, new, new_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_dec_fail,
+	LTTNG_UST_TP_ARGS(
+		long, futex_p_arg,
+		int, old_arg,
+		int, new_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex, futex_p_arg)
+		lttng_ust_field_integer(int, old, old_arg)
+		lttng_ust_field_integer(int, new, new_arg)
+	)
+)
+
+#endif /* _WORKQUEUE_TP_H */
+#include <lttng/tracepoint-event.h>