rearrange jl_delete_thread to be thread-safe (#56097)
Prior to this, especially on macOS, the gc-safepoint here would cause
the process to segfault as we had already freed the current_task state.
Rearrange this code so that the GC interactions (except for the atomic
store to current_task) are all handled before entering GC safe, and then
signaling the thread is deleted (via setting current_task = NULL,
published by jl_unlock_profile_wr to other threads) is last.
```
ERROR: Exception handler triggered on unmanaged thread.
Process 53827 stopped
* thread #5, stop reason = EXC_BAD_ACCESS (code=2, address=0x100018008)
frame #0: 0x0000000100b74344 libjulia-internal.1.12.0.dylib`jl_delete_thread [inlined] jl_gc_state_set(ptls=0x000000011f8b3200, state='\x02', old_state=<unavailable>) at julia_threads.h:272:9 [opt]
269 assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
270 jl_atomic_store_release(&ptls->gc_state, state);
271 if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
-> 272 jl_gc_safepoint_(ptls);
273 return old_state;
274 }
275 STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
Target 0: (julia) stopped.
(lldb) up
frame #1: 0x0000000100b74320 libjulia-internal.1.12.0.dylib`jl_delete_thread [inlined] jl_gc_state_save_and_set(ptls=0x000000011f8b3200, state='\x02') at julia_threads.h:278:12 [opt]
275 STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
276 int8_t state)
277 {
-> 278 return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
279 }
280 #ifdef __clang_gcanalyzer__
281 // these might not be a safepoint (if they are no-op safe=>safe transitions), but we have to assume it could be (statically)
(lldb)
frame #2: 0x0000000100b7431c libjulia-internal.1.12.0.dylib`jl_delete_thread(value=0x000000011f8b3200) at threading.c:537:11 [opt]
534 ptls->root_task = NULL;
535 jl_free_thread_gc_state(ptls);
536 // then park in safe-region
-> 537 (void)jl_gc_safe_enter(ptls);
538 }
```
(test incorporated into https://github.com/JuliaLang/julia/pull/55793)