[lttng-ust] Add ctor/dtor priorities for tracepoints/events
diff mbox series

Message ID 20200711152907.676582-1-olivier.dion@polymtl.ca
State New
Headers show
Series
  • [lttng-ust] Add ctor/dtor priorities for tracepoints/events
Related show

Commit Message

Olivier Dion July 11, 2020, 3:29 p.m. UTC
Some library might want to generate events in their ctor/dtor.  If
LTTng initialize/finalize its tracepoints/events at the wrong time,
events are lost.

Order of execution of the ctor/dtor is determined by priority.  When
some priorities are equal, the order of execution seems to be
determined by:

	   a) Order of appearance if in the same compilation unit

	   b) Order of link if in different compilation units

	   c) Order of load by ld-linux.so or dlopen(3) for
	      share objects

Also, using the constructor/destructor attributes without any priority
will default to the _lowest_ priority 65535, at least for GCC.

Thus, Providing the LTTNG_*_PRIO definitions allows users to set their
ctor/dtor priority like so:
----------------------------------------------------------------------
...
__attribute__((constructor(LTTNG_CTOR_PRIO + 1)))
...
__attribute__((destructor(LTTNG_DTOR_PRIO + 1)))
...
----------------------------------------------------------------------
or without any priority, that would also work.

Note that LTTNG_*_PRIO are set to 101 because it is the _highest_
priority and 0 to 100 are reserved.

Signed-off-by: Olivier Dion <olivier.dion at polymtl.ca>
---
 include/lttng/tracepoint.h           | 8 ++++----
 include/lttng/ust-compiler.h         | 7 +++++++
 include/lttng/ust-tracepoint-event.h | 4 ++--
 3 files changed, 13 insertions(+), 6 deletions(-)

Comments

Mathieu Desnoyers July 12, 2020, 1:49 p.m. UTC | #1
----- On Jul 11, 2020, at 11:29 AM, lttng-dev lttng-dev at lists.lttng.org wrote:

> Some library might want to generate events in their ctor/dtor.  If
> LTTng initialize/finalize its tracepoints/events at the wrong time,
> events are lost.
> 
> Order of execution of the ctor/dtor is determined by priority.  When
> some priorities are equal, the order of execution seems to be
> determined by:
> 
>	   a) Order of appearance if in the same compilation unit
> 
>	   b) Order of link if in different compilation units
> 
>	   c) Order of load by ld-linux.so or dlopen(3) for
>	      share objects

I recall different rules about constructor priorities. Can you provide
links to documentation stating the priority order you describe above ?

Also, we should compare two approaches to fulfill your goal:
one alternative would be to have application/library constructors
explicitly call tracepoint constructors if they wish to use them.

Thanks,

Mathieu

> 
> Also, using the constructor/destructor attributes without any priority
> will default to the _lowest_ priority 65535, at least for GCC.
> 
> Thus, Providing the LTTNG_*_PRIO definitions allows users to set their
> ctor/dtor priority like so:
> ----------------------------------------------------------------------
> ...
> __attribute__((constructor(LTTNG_CTOR_PRIO + 1)))
> ...
> __attribute__((destructor(LTTNG_DTOR_PRIO + 1)))
> ...
> ----------------------------------------------------------------------
> or without any priority, that would also work.
> 
> Note that LTTNG_*_PRIO are set to 101 because it is the _highest_
> priority and 0 to 100 are reserved.
> 
> Signed-off-by: Olivier Dion <olivier.dion at polymtl.ca>
> ---
> include/lttng/tracepoint.h           | 8 ++++----
> include/lttng/ust-compiler.h         | 7 +++++++
> include/lttng/ust-tracepoint-event.h | 4 ++--
> 3 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/include/lttng/tracepoint.h b/include/lttng/tracepoint.h
> index d77a2fb2..70903757 100644
> --- a/include/lttng/tracepoint.h
> +++ b/include/lttng/tracepoint.h
> @@ -318,7 +318,7 @@ __tracepoint__init_urcu_sym(void)
> }
> #endif
> 
> -static void lttng_ust_notrace __attribute__((constructor))
> +static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
> __tracepoints__init(void);
> static void
> __tracepoints__init(void)
> @@ -340,7 +340,7 @@ __tracepoints__init(void)
> 	__tracepoint__init_urcu_sym();
> }
> 
> -static void lttng_ust_notrace __attribute__((destructor))
> +static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
> __tracepoints__destroy(void);
> static void
> __tracepoints__destroy(void)
> @@ -444,7 +444,7 @@ extern struct lttng_ust_tracepoint * const
> __stop___tracepoints_ptrs[]
> 		__attribute__((used, section("__tracepoints_ptrs"))) =		\
> 			&__tracepoint_##_provider##___##_name;
> 
> -static void lttng_ust_notrace __attribute__((constructor))
> +static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
> __tracepoints__ptrs_init(void);
> static void
> __tracepoints__ptrs_init(void)
> @@ -488,7 +488,7 @@ __tracepoints__ptrs_init(void)
> 	}
> }
> 
> -static void lttng_ust_notrace __attribute__((destructor))
> +static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
> __tracepoints__ptrs_destroy(void);
> static void
> __tracepoints__ptrs_destroy(void)
> diff --git a/include/lttng/ust-compiler.h b/include/lttng/ust-compiler.h
> index 1d04da1a..4f1b1a40 100644
> --- a/include/lttng/ust-compiler.h
> +++ b/include/lttng/ust-compiler.h
> @@ -27,4 +27,11 @@
> #define lttng_ust_notrace __attribute__((no_instrument_function))
> #define LTTNG_PACKED	__attribute__((__packed__))
> 
> +/*
> + * Value in range [0, 100] are reserved.  Thus 101 is the higest
> + * priority that can be used.
> + */
> +#define LTTNG_CTOR_PRIO 101
> +#define LTTNG_DTOR_PRIO 101
> +
> #endif /* _LTTNG_UST_COMPILER_H */
> diff --git a/include/lttng/ust-tracepoint-event.h
> b/include/lttng/ust-tracepoint-event.h
> index 7890c247..cd45ae08 100644
> --- a/include/lttng/ust-tracepoint-event.h
> +++ b/include/lttng/ust-tracepoint-event.h
> @@ -1009,7 +1009,7 @@ static int
> _TP_COMBINE_TOKENS(__probe_register_refcount___, TRACEPOINT_PROVIDER)
> 
> /* Reset all macros within TRACEPOINT_EVENT */
> #include <lttng/ust-tracepoint-event-reset.h>
> -static void lttng_ust_notrace __attribute__((constructor))
> +static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
> _TP_COMBINE_TOKENS(__lttng_events_init__, TRACEPOINT_PROVIDER)(void);
> static void
> _TP_COMBINE_TOKENS(__lttng_events_init__, TRACEPOINT_PROVIDER)(void)
> @@ -1036,7 +1036,7 @@ _TP_COMBINE_TOKENS(__lttng_events_init__,
> TRACEPOINT_PROVIDER)(void)
> 	}
> }
> 
> -static void lttng_ust_notrace __attribute__((destructor))
> +static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
> _TP_COMBINE_TOKENS(__lttng_events_exit__, TRACEPOINT_PROVIDER)(void);
> static void
> _TP_COMBINE_TOKENS(__lttng_events_exit__, TRACEPOINT_PROVIDER)(void)
> --
> 2.27.0
> 
> _______________________________________________
> lttng-dev mailing list
> lttng-dev at lists.lttng.org
> https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
Olivier Dion July 12, 2020, 3:49 p.m. UTC | #2
On Sun, 12 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
> ----- On Jul 11, 2020, at 11:29 AM, lttng-dev lttng-dev at lists.lttng.org wrote:
>
>> Some library might want to generate events in their ctor/dtor.  If
>> LTTng initialize/finalize its tracepoints/events at the wrong time,
>> events are lost.
>> 
>> Order of execution of the ctor/dtor is determined by priority.  When
>> some priorities are equal, the order of execution seems to be
>> determined by:
>> 
>>	   a) Order of appearance if in the same compilation unit
>> 
>>	   b) Order of link if in different compilation units
>> 
>>	   c) Order of load by ld-linux.so or dlopen(3) for
>>	      share objects
>
> I recall different rules about constructor priorities. Can you provide
> links to documentation stating the priority order you describe above ?

I haven't found any documentation on that.  This is purely empirical.
Although I'm sure that we can dig something if chatting on GCC's IRC.

> Also, we should compare two approaches to fulfill your goal:
> one alternative would be to have application/library constructors
> explicitly call tracepoint constructors if they wish to use them.

I would prefer this way.  The former solution might not work in some
cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
initialization in that case.

I don't see any cons for the second approach, except making the symbols
table a few bytes larger.  I'll post a patch soon so we can compare and
try to find more documentation on ctor priority.
Mathieu Desnoyers July 13, 2020, 1:24 p.m. UTC | #3
----- On Jul 12, 2020, at 11:49 AM, Olivier Dion olivier.dion at polymtl.ca wrote:

> On Sun, 12 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>> ----- On Jul 11, 2020, at 11:29 AM, lttng-dev lttng-dev at lists.lttng.org wrote:
>>
>>> Some library might want to generate events in their ctor/dtor.  If
>>> LTTng initialize/finalize its tracepoints/events at the wrong time,
>>> events are lost.
>>> 
>>> Order of execution of the ctor/dtor is determined by priority.  When
>>> some priorities are equal, the order of execution seems to be
>>> determined by:
>>> 
>>>	   a) Order of appearance if in the same compilation unit
>>> 
>>>	   b) Order of link if in different compilation units
>>> 
>>>	   c) Order of load by ld-linux.so or dlopen(3) for
>>>	      share objects
>>
>> I recall different rules about constructor priorities. Can you provide
>> links to documentation stating the priority order you describe above ?
> 
> I haven't found any documentation on that.  This is purely empirical.
> Although I'm sure that we can dig something if chatting on GCC's IRC.

If it is not documented, then I am reluctant on depending on a behavior
which may be what happens today, but may not be the same for past/future
toolchains.

> 
>> Also, we should compare two approaches to fulfill your goal:
>> one alternative would be to have application/library constructors
>> explicitly call tracepoint constructors if they wish to use them.
> 
> I would prefer this way.  The former solution might not work in some
> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
> initialization in that case.
> 
> I don't see any cons for the second approach, except making the symbols
> table a few bytes larger.  I'll post a patch soon so we can compare and
> try to find more documentation on ctor priority.

And users will have to explicitly call the constructor on which they
depend, but I don't see it as a huge burden.

Beware though that there are a few configurations which can be used for
probe providers (see lttng-ust(3)).

Thanks,

Mathieu

> 
> --
> Olivier Dion
> PolyMtl
Olivier Dion July 13, 2020, 3:19 p.m. UTC | #4
On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
> ----- On Jul 12, 2020, at 11:49 AM, Olivier Dion olivier.dion at polymtl.ca wrote:
>
>> On Sun, 12 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>>> ----- On Jul 11, 2020, at 11:29 AM, lttng-dev lttng-dev at lists.lttng.org wrote:
>>>
>>>> Some library might want to generate events in their ctor/dtor.  If
>>>> LTTng initialize/finalize its tracepoints/events at the wrong time,
>>>> events are lost.
>>>> 
>>>> Order of execution of the ctor/dtor is determined by priority.  When
>>>> some priorities are equal, the order of execution seems to be
>>>> determined by:
>>>> 
>>>>	   a) Order of appearance if in the same compilation unit
>>>> 
>>>>	   b) Order of link if in different compilation units
>>>> 
>>>>	   c) Order of load by ld-linux.so or dlopen(3) for
>>>>	      share objects
>>>
>>> I recall different rules about constructor priorities. Can you provide
>>> links to documentation stating the priority order you describe above ?
>> 
>> I haven't found any documentation on that.  This is purely empirical.
>> Although I'm sure that we can dig something if chatting on GCC's IRC.
>
> If it is not documented, then I am reluctant on depending on a behavior
> which may be what happens today, but may not be the same for past/future
> toolchains.

Agree.

>>> Also, we should compare two approaches to fulfill your goal:
>>> one alternative would be to have application/library constructors
>>> explicitly call tracepoint constructors if they wish to use them.
>> 
>> I would prefer this way.  The former solution might not work in some
>> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
>> initialization in that case.
>> 
>> I don't see any cons for the second approach, except making the symbols
>> table a few bytes larger.  I'll post a patch soon so we can compare and
>> try to find more documentation on ctor priority.
>
> And users will have to explicitly call the constructor on which they
> depend, but I don't see it as a huge burden.

The burden is small indeed.  But users should pay close attention to
release the references in a destructor too.

> Beware though that there are a few configurations which can be used for
> probe providers (see lttng-ust(3)).

I'm not following you here.  I don't see any configuration for provider
except TRACEPOINT_LOGLEVEL.  What should I be aware of?
Mathieu Desnoyers July 13, 2020, 3:28 p.m. UTC | #5
----- On Jul 13, 2020, at 11:19 AM, Olivier Dion olivier.dion at polymtl.ca wrote:

> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
[...]
> 
>>>> Also, we should compare two approaches to fulfill your goal:
>>>> one alternative would be to have application/library constructors
>>>> explicitly call tracepoint constructors if they wish to use them.
>>> 
>>> I would prefer this way.  The former solution might not work in some
>>> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
>>> initialization in that case.
>>> 
>>> I don't see any cons for the second approach, except making the symbols
>>> table a few bytes larger.  I'll post a patch soon so we can compare and
>>> try to find more documentation on ctor priority.
>>
>> And users will have to explicitly call the constructor on which they
>> depend, but I don't see it as a huge burden.
> 
> The burden is small indeed.  But users should pay close attention to
> release the references in a destructor too.
> 
>> Beware though that there are a few configurations which can be used for
>> probe providers (see lttng-ust(3)).
> 
> I'm not following you here.  I don't see any configuration for provider
> except TRACEPOINT_LOGLEVEL.  What should I be aware of?

See sections "Statically linking the tracepoint provider" and
"Dynamically loading the tracepoint provider" from lttng-ust(3). It's
especially the dynamic loading I am concerned about, because then it
becomes tricky for an instrumented .so (or app) to call the probe provider's
constructor without dlopening it beforehand, because there are no dependencies
from the instrumented module on probe symbols. And given you plan to call
this from a constructor, it means the dynamic loader lock is already held,
so even if we dlopen the probe provider from the instrumented constructor,
I am not sure the dlopen'd .so's constructor will be allowed to run
immediately.

Maybe one thing that could work for the dynamic loading case would be to:

- let the instrumented constructor dlopen its probe,
- from the instrumented constructor, use dlsym to get the probe's constructor
  symbols.
- call those constructors.

If this is common enough, maybe we would want to provide helpers for this.

Thanks,

Mathieu
Olivier Dion July 13, 2020, 6:46 p.m. UTC | #6
On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
> ----- On Jul 13, 2020, at 11:19 AM, Olivier Dion olivier.dion at polymtl.ca wrote:
>
>> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
> [...]
>> 
>>>>> Also, we should compare two approaches to fulfill your goal:
>>>>> one alternative would be to have application/library constructors
>>>>> explicitly call tracepoint constructors if they wish to use them.
>>>> 
>>>> I would prefer this way.  The former solution might not work in some
>>>> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
>>>> initialization in that case.
>>>> 
>>>> I don't see any cons for the second approach, except making the symbols
>>>> table a few bytes larger.  I'll post a patch soon so we can compare and
>>>> try to find more documentation on ctor priority.
>>>
>>> And users will have to explicitly call the constructor on which they
>>> depend, but I don't see it as a huge burden.
>> 
>> The burden is small indeed.  But users should pay close attention to
>> release the references in a destructor too.
>> 
>>> Beware though that there are a few configurations which can be used for
>>> probe providers (see lttng-ust(3)).
>> 
>> I'm not following you here.  I don't see any configuration for provider
>> except TRACEPOINT_LOGLEVEL.  What should I be aware of?
>
> See sections "Statically linking the tracepoint provider" and
> "Dynamically loading the tracepoint provider" from lttng-ust(3). It's
> especially the dynamic loading I am concerned about, because then it
> becomes tricky for an instrumented .so (or app) to call the probe provider's
> constructor without dlopening it beforehand, because there are no dependencies
> from the instrumented module on probe symbols. And given you plan to call
> this from a constructor, it means the dynamic loader lock is already held,
> so even if we dlopen the probe provider from the instrumented constructor,
> I am not sure the dlopen'd .so's constructor will be allowed to run
> immediately.
>
> Maybe one thing that could work for the dynamic loading case would be to:
>
> - let the instrumented constructor dlopen its probe,
> - from the instrumented constructor, use dlsym to get the probe's constructor
>   symbols.
> - call those constructors.
>
> If this is common enough, maybe we would want to provide helpers for
> this.

Okay so to be clear.  __tracepoints__init() should be call first, then
__tracepoints__ptrs_init() and then dlsym(3) on
__lttng_events_init__provider() _if_ TRACEPOINT_PROBE_DYNAMIC_LINKAGE.

Reverse the steps in destructor.

And so would something along these lines work?
--------------------------------------------------------------------------------
#ifdef TRACEPOINT_PROBE_DYNAMIC_LINKAGE

#  define tracepoint_acquire(provider)                           \
        do {                                                     \
                void (*init)(void);                              \
                __tracepoints__init();                           \
                __tracepoints__ptrs_init();                      \
                init = dlsym(RTLD_DEFAULT,                       \
                             "__lttng_events_init__" #provider); \
                if (init) {                                      \
                        init();                                  \
                }                                                \
        } while(0)

#else

#  define tracepoint_acquire(provider)                                 \
        do {                                                           \
                __tracepoint__init();                                  \
                __tracepoints_ptrs_init();                             \
                _TP_COMBINE_TOKENS(__lttng_events_init__, provider)(); \
        } while(0)

#endif
--------------------------------------------------------------------------------

And then:
----------------------------------------------------------------------
#include "my-trace.h"

__attribute__((constructor))
static void my_ctor(void)
{
        tracepoint_acquire(my_provider);
        tracepoint(my_provider, my_event, my_state);
}

__attribute__((destructor))
static void my_ctor(void)
{
        tracepoint_release(my_provider)
}
----------------------------------------------------------------------

Of course, this requires making __tracepoints__* externally visibile.
Mathieu Desnoyers July 13, 2020, 6:58 p.m. UTC | #7
----- On Jul 13, 2020, at 2:46 PM, Olivier Dion olivier.dion at polymtl.ca wrote:

> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>> ----- On Jul 13, 2020, at 11:19 AM, Olivier Dion olivier.dion at polymtl.ca wrote:
>>
>>> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>> [...]
>>> 
>>>>>> Also, we should compare two approaches to fulfill your goal:
>>>>>> one alternative would be to have application/library constructors
>>>>>> explicitly call tracepoint constructors if they wish to use them.
>>>>> 
>>>>> I would prefer this way.  The former solution might not work in some
>>>>> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
>>>>> initialization in that case.
>>>>> 
>>>>> I don't see any cons for the second approach, except making the symbols
>>>>> table a few bytes larger.  I'll post a patch soon so we can compare and
>>>>> try to find more documentation on ctor priority.
>>>>
>>>> And users will have to explicitly call the constructor on which they
>>>> depend, but I don't see it as a huge burden.
>>> 
>>> The burden is small indeed.  But users should pay close attention to
>>> release the references in a destructor too.
>>> 
>>>> Beware though that there are a few configurations which can be used for
>>>> probe providers (see lttng-ust(3)).
>>> 
>>> I'm not following you here.  I don't see any configuration for provider
>>> except TRACEPOINT_LOGLEVEL.  What should I be aware of?
>>
>> See sections "Statically linking the tracepoint provider" and
>> "Dynamically loading the tracepoint provider" from lttng-ust(3). It's
>> especially the dynamic loading I am concerned about, because then it
>> becomes tricky for an instrumented .so (or app) to call the probe provider's
>> constructor without dlopening it beforehand, because there are no dependencies
>> from the instrumented module on probe symbols. And given you plan to call
>> this from a constructor, it means the dynamic loader lock is already held,
>> so even if we dlopen the probe provider from the instrumented constructor,
>> I am not sure the dlopen'd .so's constructor will be allowed to run
>> immediately.
>>
>> Maybe one thing that could work for the dynamic loading case would be to:
>>
>> - let the instrumented constructor dlopen its probe,
>> - from the instrumented constructor, use dlsym to get the probe's constructor
>>   symbols.
>> - call those constructors.
>>
>> If this is common enough, maybe we would want to provide helpers for
>> this.
> 
> Okay so to be clear.  __tracepoints__init() should be call first, then
> __tracepoints__ptrs_init()

I don't think the order matters. What makes you think otherwise ?

> and then dlsym(3) on
> __lttng_events_init__provider() _if_ TRACEPOINT_PROBE_DYNAMIC_LINKAGE.

Yes.

> 
> Reverse the steps in destructor.
> 
> And so would something along these lines work?
> --------------------------------------------------------------------------------
> #ifdef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
> 
> #  define tracepoint_acquire(provider)                           \
>        do {                                                     \
>                void (*init)(void);                              \
>                __tracepoints__init();                           \
>                __tracepoints__ptrs_init();                      \

Where is the dlopen() done ? What code is responsible for it ?

>                init = dlsym(RTLD_DEFAULT,                       \

This should use the handled returned by dlopen.

>                             "__lttng_events_init__" #provider); \
>                if (init) {                                      \
>                        init();                                  \
>                }                                                \
>        } while(0)
> 

We may want a dlclose on the release (?)

> #else
> 
> #  define tracepoint_acquire(provider)                                 \
>        do {                                                           \
>                __tracepoint__init();                                  \
>                __tracepoints_ptrs_init();                             \
>                _TP_COMBINE_TOKENS(__lttng_events_init__, provider)(); \
>        } while(0)
> 
> #endif
> --------------------------------------------------------------------------------
> 
> And then:
> ----------------------------------------------------------------------
> #include "my-trace.h"
> 
> __attribute__((constructor))
> static void my_ctor(void)
> {
>        tracepoint_acquire(my_provider);
>        tracepoint(my_provider, my_event, my_state);
> }
> 
> __attribute__((destructor))
> static void my_ctor(void)
> {
>        tracepoint_release(my_provider)
> }
> ----------------------------------------------------------------------
> 
> Of course, this requires making __tracepoints__* externally visibile.

Why is that so ?

Thanks,

Mathieu


> 
> --
> Olivier Dion
> PolyMtl
Olivier Dion July 13, 2020, 7:44 p.m. UTC | #8
On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
> ----- On Jul 13, 2020, at 2:46 PM, Olivier Dion olivier.dion at polymtl.ca wrote:
>
>> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>>> ----- On Jul 13, 2020, at 11:19 AM, Olivier Dion olivier.dion at polymtl.ca wrote:
>>>
>>>> On Mon, 13 Jul 2020, Mathieu Desnoyers <mathieu.desnoyers at efficios.com> wrote:
>>> [...]
>>>> 
>>>>>>> Also, we should compare two approaches to fulfill your goal:
>>>>>>> one alternative would be to have application/library constructors
>>>>>>> explicitly call tracepoint constructors if they wish to use them.
>>>>>> 
>>>>>> I would prefer this way.  The former solution might not work in some
>>>>>> cases (e.g. with LD_PRELOAD and priority =101) and I prefer explicit
>>>>>> initialization in that case.
>>>>>> 
>>>>>> I don't see any cons for the second approach, except making the symbols
>>>>>> table a few bytes larger.  I'll post a patch soon so we can compare and
>>>>>> try to find more documentation on ctor priority.
>>>>>
>>>>> And users will have to explicitly call the constructor on which they
>>>>> depend, but I don't see it as a huge burden.
>>>> 
>>>> The burden is small indeed.  But users should pay close attention to
>>>> release the references in a destructor too.
>>>> 
>>>>> Beware though that there are a few configurations which can be used for
>>>>> probe providers (see lttng-ust(3)).
>>>> 
>>>> I'm not following you here.  I don't see any configuration for provider
>>>> except TRACEPOINT_LOGLEVEL.  What should I be aware of?
>>>
>>> See sections "Statically linking the tracepoint provider" and
>>> "Dynamically loading the tracepoint provider" from lttng-ust(3). It's
>>> especially the dynamic loading I am concerned about, because then it
>>> becomes tricky for an instrumented .so (or app) to call the probe provider's
>>> constructor without dlopening it beforehand, because there are no dependencies
>>> from the instrumented module on probe symbols. And given you plan to call
>>> this from a constructor, it means the dynamic loader lock is already held,
>>> so even if we dlopen the probe provider from the instrumented constructor,
>>> I am not sure the dlopen'd .so's constructor will be allowed to run
>>> immediately.
>>>
>>> Maybe one thing that could work for the dynamic loading case would be to:
>>>
>>> - let the instrumented constructor dlopen its probe,
>>> - from the instrumented constructor, use dlsym to get the probe's constructor
>>>   symbols.
>>> - call those constructors.
>>>
>>> If this is common enough, maybe we would want to provide helpers for
>>> this.
>> 
>> Okay so to be clear.  __tracepoints__init() should be call first, then
>> __tracepoints__ptrs_init()
>
> I don't think the order matters. What makes you think otherwise ?

I assumed __tracepoints_init() initialized rcu, but apparently __ptrs do
the same and more.

>
>> and then dlsym(3) on
>> __lttng_events_init__provider() _if_ TRACEPOINT_PROBE_DYNAMIC_LINKAGE.
>
> Yes.
>
>> 
>> Reverse the steps in destructor.
>> 
>> And so would something along these lines work?
>> --------------------------------------------------------------------------------
>> #ifdef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
>> 
>> #  define tracepoint_acquire(provider)                           \
>>        do {                                                     \
>>                void (*init)(void);                              \
>>                __tracepoints__init();                           \
>>                __tracepoints__ptrs_init();                      \
>
> Where is the dlopen() done ? What code is responsible for it ?

I assume here that the desired trace provider is part of a share object
that has already been dlopen() before.

Using RTLD_DEFAULT or simply NULL should find the correct symbol in the
executable if the share object that has the trace provider is _already_
loaded in memory.

Otherwise, the macro should be something like
'tracepoint_acquire(provider, so_path)' I guess?  And so this would
indeed require a dlopen() on so_path and so on.

>
>>                init = dlsym(RTLD_DEFAULT,                       \
>
> This should use the handled returned by dlopen.
>
>>                             "__lttng_events_init__" #provider); \
>>                if (init) {                                      \
>>                        init();                                  \
>>                }                                                \
>>        } while(0)
>> 
>
> We may want a dlclose on the release (?)

Yes of course!

>
>> #else
>> 
>> #  define tracepoint_acquire(provider)                                 \
>>        do {                                                           \
>>                __tracepoint__init();                                  \
>>                __tracepoints_ptrs_init();                             \
>>                _TP_COMBINE_TOKENS(__lttng_events_init__, provider)(); \
>>        } while(0)
>> 
>> #endif
>> --------------------------------------------------------------------------------
>> 
>> And then:
>> ----------------------------------------------------------------------
>> #include "my-trace.h"
>> 
>> __attribute__((constructor))
>> static void my_ctor(void)
>> {
>>        tracepoint_acquire(my_provider);
>>        tracepoint(my_provider, my_event, my_state);
>> }
>> 
>> __attribute__((destructor))
>> static void my_ctor(void)
>> {
>>        tracepoint_release(my_provider)
>> }
>> ----------------------------------------------------------------------
>> 
>> Of course, this requires making __tracepoints__* externally visibile.
>
> Why is that so ?

__tracepoints__init() is statically defined in every compilation units
that include the trace header.  So this one doesn't actually need to be
externally visible, my mistake.  Although I don't understand why this
initializer is duplicated across units.

However, __tracepoints__ptrs__init() is statically defined in one
compilation unit, the unit that has defined the TRACEPOINT_DEFINE macro.
So I guess that the pointer tables is unique for every exe/so.  If
that's the case, then this initializer should also be find with dlsym()?

Patch
diff mbox series

diff --git a/include/lttng/tracepoint.h b/include/lttng/tracepoint.h
index d77a2fb2..70903757 100644
--- a/include/lttng/tracepoint.h
+++ b/include/lttng/tracepoint.h
@@ -318,7 +318,7 @@  __tracepoint__init_urcu_sym(void)
 }
 #endif
 
-static void lttng_ust_notrace __attribute__((constructor))
+static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
 __tracepoints__init(void);
 static void
 __tracepoints__init(void)
@@ -340,7 +340,7 @@  __tracepoints__init(void)
 	__tracepoint__init_urcu_sym();
 }
 
-static void lttng_ust_notrace __attribute__((destructor))
+static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
 __tracepoints__destroy(void);
 static void
 __tracepoints__destroy(void)
@@ -444,7 +444,7 @@  extern struct lttng_ust_tracepoint * const __stop___tracepoints_ptrs[]
 		__attribute__((used, section("__tracepoints_ptrs"))) =		\
 			&__tracepoint_##_provider##___##_name;
 
-static void lttng_ust_notrace __attribute__((constructor))
+static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
 __tracepoints__ptrs_init(void);
 static void
 __tracepoints__ptrs_init(void)
@@ -488,7 +488,7 @@  __tracepoints__ptrs_init(void)
 	}
 }
 
-static void lttng_ust_notrace __attribute__((destructor))
+static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
 __tracepoints__ptrs_destroy(void);
 static void
 __tracepoints__ptrs_destroy(void)
diff --git a/include/lttng/ust-compiler.h b/include/lttng/ust-compiler.h
index 1d04da1a..4f1b1a40 100644
--- a/include/lttng/ust-compiler.h
+++ b/include/lttng/ust-compiler.h
@@ -27,4 +27,11 @@ 
 #define lttng_ust_notrace __attribute__((no_instrument_function))
 #define LTTNG_PACKED	__attribute__((__packed__))
 
+/*
+ * Value in range [0, 100] are reserved.  Thus 101 is the higest
+ * priority that can be used.
+ */
+#define LTTNG_CTOR_PRIO 101
+#define LTTNG_DTOR_PRIO 101
+
 #endif /* _LTTNG_UST_COMPILER_H */
diff --git a/include/lttng/ust-tracepoint-event.h b/include/lttng/ust-tracepoint-event.h
index 7890c247..cd45ae08 100644
--- a/include/lttng/ust-tracepoint-event.h
+++ b/include/lttng/ust-tracepoint-event.h
@@ -1009,7 +1009,7 @@  static int _TP_COMBINE_TOKENS(__probe_register_refcount___, TRACEPOINT_PROVIDER)
 
 /* Reset all macros within TRACEPOINT_EVENT */
 #include <lttng/ust-tracepoint-event-reset.h>
-static void lttng_ust_notrace __attribute__((constructor))
+static void lttng_ust_notrace __attribute__((constructor(LTTNG_CTOR_PRIO)))
 _TP_COMBINE_TOKENS(__lttng_events_init__, TRACEPOINT_PROVIDER)(void);
 static void
 _TP_COMBINE_TOKENS(__lttng_events_init__, TRACEPOINT_PROVIDER)(void)
@@ -1036,7 +1036,7 @@  _TP_COMBINE_TOKENS(__lttng_events_init__, TRACEPOINT_PROVIDER)(void)
 	}
 }
 
-static void lttng_ust_notrace __attribute__((destructor))
+static void lttng_ust_notrace __attribute__((destructor(LTTNG_DTOR_PRIO)))
 _TP_COMBINE_TOKENS(__lttng_events_exit__, TRACEPOINT_PROVIDER)(void);
 static void
 _TP_COMBINE_TOKENS(__lttng_events_exit__, TRACEPOINT_PROVIDER)(void)