diff mbox series

urcu workqueue thread uses 99% of cpu while workqueue is empty

Message ID 20220616080912.GA246781@localhost.localdomain
State New
Headers show
Series urcu workqueue thread uses 99% of cpu while workqueue is empty | expand

Commit Message

Minlan Wang June 16, 2022, 8:09 a.m. UTC
Hi, Mathieu,
I tried to write a simple program to reproduce this issue, but failed.
The environment we used to produce this issue is a storage cluster.
It has 3 nodes, each with an NVME and several SATA disks, urcu is part of the
storage cluster software.
The storage software is pretty big, 3+ GB in size. I guess it's hard for you to
build a environment like ours.

Besides, I made some change into workqueue.c to debug this issue today, here's
the patch i used for debuging:

>From 18f3fea1436e0c999b346b998b7153150b92e62f Mon Sep 17 00:00:00 2001
From: wangminlan <wangminlan at szsandstone.com>
Date: Wed, 15 Jun 2022 14:28:53 +0800
Subject: [PATCH] <urcu> debug workqueue->futex

---
 .../userspace-rcu-0.12.1/src/workqueue.c           | 49 ++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/ceph/thirdpart/ascache/thirdparty/userspace-rcu-0.12.1/src/workqueue.c b/ceph/thirdpart/ascache/thirdparty/userspace-rcu-0.12.1/src/workqueue.c
index 59eb21d..eed1e21 100644
--- a/ceph/thirdpart/ascache/thirdparty/userspace-rcu-0.12.1/src/workqueue.c
+++ b/ceph/thirdpart/ascache/thirdparty/userspace-rcu-0.12.1/src/workqueue.c
@@ -132,12 +132,21 @@  static int set_thread_cpu_affinity(struct urcu_workqueue *workqueue)
 
 static void futex_wait(int32_t *futex)
 {
+	int ret;
+	int err;
 	/* Read condition before read futex */
 	cmm_smp_mb();
-	if (uatomic_read(futex) != -1)
+	if (uatomic_read(futex) != -1) {
+		fprintf(stderr, "%lu: wq %p: %s futex != -1, don't wait\n",
+			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
+
 		return;
-	while (futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
-		switch (errno) {
+	}
+	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
+		err = errno;
+		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
+			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, err);
+		switch (err) {
 		case EWOULDBLOCK:
 			/* Value already changed. */
 			return;
@@ -146,17 +155,35 @@  static void futex_wait(int32_t *futex)
 			break;	/* Get out of switch. */
 		default:
 			/* Unexpected error. */
-			urcu_die(errno);
+			urcu_die(err);
 		}
 	}
+
+	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
+		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
+
 }
 
 static void futex_wake_up(int32_t *futex)
 {
 	/* Write to condition before reading/writing futex */
+	int32_t old;
+
 	cmm_smp_mb();
-	if (caa_unlikely(uatomic_read(futex) == -1)) {
-		uatomic_set(futex, 0);
+	old = uatomic_read(futex);
+	if (caa_unlikely(old == -1)) {
+		old = uatomic_xchg(futex, 0);
+		if (old == -1) {
+			fprintf(stderr, "%lu: wq %p, wakeup succeed: old %d\n",
+				pthread_self(),
+				caa_container_of(futex, struct urcu_workqueue, futex),
+				old);
+		} else {
+			fprintf(stderr, "%lu: wq %p, wakeup failed: old %d\n",
+				pthread_self(),
+				caa_container_of(futex, struct urcu_workqueue, futex),
+				old);
+		}
 		if (futex_async(futex, FUTEX_WAKE, 1,
 				NULL, NULL, 0) < 0)
 			urcu_die(errno);
@@ -237,8 +264,16 @@  static void *workqueue_thread(void *arg)
 		if (!rt) {
 			if (cds_wfcq_empty(&workqueue->cbs_head,
 					&workqueue->cbs_tail)) {
+				int32_t new;
 				futex_wait(&workqueue->futex);
-				uatomic_dec(&workqueue->futex);
+				new = uatomic_add_return(&workqueue->futex, -1);
+				if (new == -1) {
+					fprintf(stderr, "%lu: wq %p dec succeed: old %d, new %d\n",
+						pthread_self(), workqueue, new + 1, new);
+				} else {
+					fprintf(stderr, "%lu: wq %p dec failed: old %d\n",
+						pthread_self(), workqueue, new + 1);
+				}
 				/*
 				 * Decrement futex before reading
 				 * urcu_work list.