14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
116static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
118 kmp_nested_nthreads_t *new_nested_nth =
119 (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
120 sizeof(kmp_nested_nthreads_t));
121 int new_size = level + thr->th.th_set_nested_nth_sz;
122 new_nested_nth->nth = (
int *)KMP_INTERNAL_MALLOC(new_size *
sizeof(
int));
123 for (
int i = 0; i < level + 1; ++i)
124 new_nested_nth->nth[i] = 0;
125 for (
int i = level + 1, j = 1; i < new_size; ++i, ++j)
126 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127 new_nested_nth->size = new_nested_nth->used = new_size;
128 return new_nested_nth;
134int __kmp_get_global_thread_id() {
136 kmp_info_t **other_threads;
144 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
145 __kmp_nth, __kmp_all_nth));
152 if (!TCR_4(__kmp_init_gtid))
156 if (TCR_4(__kmp_gtid_mode) >= 3) {
157 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
161 if (TCR_4(__kmp_gtid_mode) >= 2) {
162 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
163 return __kmp_gtid_get_specific();
165 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
167 stack_addr = (
char *)&stack_data;
168 other_threads = __kmp_threads;
181 for (i = 0; i < __kmp_threads_capacity; i++) {
183 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
187 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
192 if (stack_addr <= stack_base) {
193 size_t stack_diff = stack_base - stack_addr;
195 if (stack_diff <= stack_size) {
202 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
203 __kmp_gtid_get_specific() == i);
211 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
212 "thread, using TLS\n"));
213 i = __kmp_gtid_get_specific();
224 if (!TCR_SYNC_PTR(other_threads[i]))
229 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
230 KMP_FATAL(StackOverflow, i);
233 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
234 if (stack_addr > stack_base) {
235 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
240 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241 stack_base - stack_addr);
245 if (__kmp_storage_map) {
246 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
247 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
248 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
249 other_threads[i]->th.th_info.ds.ds_stacksize,
250 "th_%d stack (refinement)", i);
255int __kmp_get_global_thread_id_reg() {
258 if (!__kmp_init_serial) {
262 if (TCR_4(__kmp_gtid_mode) >= 3) {
263 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
267 if (TCR_4(__kmp_gtid_mode) >= 2) {
268 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
269 gtid = __kmp_gtid_get_specific();
272 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
273 gtid = __kmp_get_global_thread_id();
277 if (gtid == KMP_GTID_DNE) {
279 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
280 "Registering a new gtid.\n"));
281 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
282 if (!__kmp_init_serial) {
283 __kmp_do_serial_initialize();
284 gtid = __kmp_gtid_get_specific();
286 gtid = __kmp_register_root(FALSE);
288 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
292 KMP_DEBUG_ASSERT(gtid >= 0);
298void __kmp_check_stack_overlap(kmp_info_t *th) {
300 char *stack_beg = NULL;
301 char *stack_end = NULL;
304 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
305 if (__kmp_storage_map) {
306 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
307 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
309 gtid = __kmp_gtid_from_thread(th);
311 if (gtid == KMP_GTID_MONITOR) {
312 __kmp_print_storage_map_gtid(
313 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
314 "th_%s stack (%s)",
"mon",
315 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
317 __kmp_print_storage_map_gtid(
318 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
319 "th_%d stack (%s)", gtid,
320 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
326 gtid = __kmp_gtid_from_thread(th);
327 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
329 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
330 if (stack_beg == NULL) {
331 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
332 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
335 for (f = 0; f < __kmp_threads_capacity; f++) {
336 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
338 if (f_th && f_th != th) {
339 char *other_stack_end =
340 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
341 char *other_stack_beg =
342 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
343 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
344 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
347 if (__kmp_storage_map)
348 __kmp_print_storage_map_gtid(
349 -1, other_stack_beg, other_stack_end,
350 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
351 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
353 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
359 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
364void __kmp_infinite_loop(
void) {
365 static int done = FALSE;
372#define MAX_MESSAGE 512
374void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
375 char const *format, ...) {
376 char buffer[MAX_MESSAGE];
379 va_start(ap, format);
380 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
381 p2, (
unsigned long)size, format);
382 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
383 __kmp_vprintf(kmp_err, buffer, ap);
384#if KMP_PRINT_DATA_PLACEMENT
387 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
388 if (__kmp_storage_map_verbose) {
389 node = __kmp_get_host_node(p1);
391 __kmp_storage_map_verbose = FALSE;
395 int localProc = __kmp_get_cpu_from_gtid(gtid);
397 const int page_size = KMP_GET_PAGE_SIZE();
399 p1 = (
void *)((
size_t)p1 & ~((
size_t)page_size - 1));
400 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
402 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
405 __kmp_printf_no_lock(
" GTID %d\n", gtid);
414 (
char *)p1 += page_size;
415 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
416 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
420 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
421 (
char *)p1 + (page_size - 1),
422 __kmp_get_host_node(p1));
424 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
425 (
char *)p2 + (page_size - 1),
426 __kmp_get_host_node(p2));
432 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
435 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
440void __kmp_warn(
char const *format, ...) {
441 char buffer[MAX_MESSAGE];
444 if (__kmp_generate_warnings == kmp_warnings_off) {
448 va_start(ap, format);
450 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
451 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
452 __kmp_vprintf(kmp_err, buffer, ap);
453 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
458void __kmp_abort_process() {
460 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
462 if (__kmp_debug_buf) {
463 __kmp_dump_debug_buffer();
469 __kmp_global.g.g_abort = SIGABRT;
483 __kmp_unregister_library();
487 __kmp_infinite_loop();
488 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
492void __kmp_abort_thread(
void) {
495 __kmp_infinite_loop();
501static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
502 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
506 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
509 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
511 __kmp_print_storage_map_gtid(
512 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
513 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
515 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
516 &thr->th.th_bar[bs_plain_barrier + 1],
517 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
520 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
521 &thr->th.th_bar[bs_forkjoin_barrier + 1],
522 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
525#if KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
527 &thr->th.th_bar[bs_reduction_barrier + 1],
528 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
536static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
537 int team_id,
int num_thr) {
538 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
539 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
542 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
543 &team->t.t_bar[bs_last_barrier],
544 sizeof(kmp_balign_team_t) * bs_last_barrier,
545 "%s_%d.t_bar", header, team_id);
547 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
548 &team->t.t_bar[bs_plain_barrier + 1],
549 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
552 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
553 &team->t.t_bar[bs_forkjoin_barrier + 1],
554 sizeof(kmp_balign_team_t),
555 "%s_%d.t_bar[forkjoin]", header, team_id);
557#if KMP_FAST_REDUCTION_BARRIER
558 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
559 &team->t.t_bar[bs_reduction_barrier + 1],
560 sizeof(kmp_balign_team_t),
561 "%s_%d.t_bar[reduction]", header, team_id);
564 __kmp_print_storage_map_gtid(
565 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
566 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
568 __kmp_print_storage_map_gtid(
569 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
570 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
572 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
573 &team->t.t_disp_buffer[num_disp_buff],
574 sizeof(dispatch_shared_info_t) * num_disp_buff,
575 "%s_%d.t_disp_buffer", header, team_id);
578static void __kmp_init_allocator() {
579 __kmp_init_memkind();
580 __kmp_init_target_mem();
582static void __kmp_fini_allocator() {
583 __kmp_fini_target_mem();
584 __kmp_fini_memkind();
589#if ENABLE_LIBOMPTARGET
590static void __kmp_init_omptarget() {
591 __kmp_init_target_task();
600BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
605 case DLL_PROCESS_ATTACH:
606 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
610 case DLL_PROCESS_DETACH:
611 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
624 if (lpReserved == NULL)
625 __kmp_internal_end_library(__kmp_gtid_get_specific());
629 case DLL_THREAD_ATTACH:
630 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
636 case DLL_THREAD_DETACH:
637 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
639 __kmp_internal_end_thread(__kmp_gtid_get_specific());
650void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
651 int gtid = *gtid_ref;
652#ifdef BUILD_PARALLEL_ORDERED
653 kmp_team_t *team = __kmp_team_from_gtid(gtid);
656 if (__kmp_env_consistency_check) {
657 if (__kmp_threads[gtid]->th.th_root->r.r_active)
658#if KMP_USE_DYNAMIC_LOCK
659 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
661 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
664#ifdef BUILD_PARALLEL_ORDERED
665 if (!team->t.t_serialized) {
667 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
675void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
676 int gtid = *gtid_ref;
677#ifdef BUILD_PARALLEL_ORDERED
678 int tid = __kmp_tid_from_gtid(gtid);
679 kmp_team_t *team = __kmp_team_from_gtid(gtid);
682 if (__kmp_env_consistency_check) {
683 if (__kmp_threads[gtid]->th.th_root->r.r_active)
684 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
686#ifdef BUILD_PARALLEL_ORDERED
687 if (!team->t.t_serialized) {
692 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
702int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
707 if (!TCR_4(__kmp_init_parallel))
708 __kmp_parallel_initialize();
709 __kmp_resume_if_soft_paused();
711 th = __kmp_threads[gtid];
712 team = th->th.th_team;
715 th->th.th_ident = id_ref;
717 if (team->t.t_serialized) {
720 kmp_int32 old_this = th->th.th_local.this_construct;
722 ++th->th.th_local.this_construct;
726 if (team->t.t_construct == old_this) {
727 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
728 th->th.th_local.this_construct);
731 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
732 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
733 team->t.t_active_level == 1) {
735 __kmp_itt_metadata_single(id_ref);
740 if (__kmp_env_consistency_check) {
741 if (status && push_ws) {
742 __kmp_push_workshare(gtid, ct_psingle, id_ref);
744 __kmp_check_workshare(gtid, ct_psingle, id_ref);
749 __kmp_itt_single_start(gtid);
755void __kmp_exit_single(
int gtid) {
757 __kmp_itt_single_end(gtid);
759 if (__kmp_env_consistency_check)
760 __kmp_pop_workshare(gtid, ct_psingle, NULL);
769static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
770 int master_tid,
int set_nthreads,
774 KMP_DEBUG_ASSERT(__kmp_init_serial);
775 KMP_DEBUG_ASSERT(root && parent_team);
776 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
780 new_nthreads = set_nthreads;
781 if (!get__dynamic_2(parent_team, master_tid)) {
784#ifdef USE_LOAD_BALANCE
785 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
786 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
787 if (new_nthreads == 1) {
788 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
789 "reservation to 1 thread\n",
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
795 "reservation to %d threads\n",
796 master_tid, new_nthreads));
800 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
801 new_nthreads = __kmp_avail_proc - __kmp_nth +
802 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
803 if (new_nthreads <= 1) {
804 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
805 "reservation to 1 thread\n",
809 if (new_nthreads < set_nthreads) {
810 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
811 "reservation to %d threads\n",
812 master_tid, new_nthreads));
814 new_nthreads = set_nthreads;
816 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
817 if (set_nthreads > 2) {
818 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
819 new_nthreads = (new_nthreads % set_nthreads) + 1;
820 if (new_nthreads == 1) {
821 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
822 "reservation to 1 thread\n",
826 if (new_nthreads < set_nthreads) {
827 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
828 "reservation to %d threads\n",
829 master_tid, new_nthreads));
837 if (__kmp_nth + new_nthreads -
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
840 int tl_nthreads = __kmp_max_nth - __kmp_nth +
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
842 if (tl_nthreads <= 0) {
847 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848 __kmp_reserve_warn = 1;
849 __kmp_msg(kmp_ms_warning,
850 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
853 if (tl_nthreads == 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
855 "reduced reservation to 1 thread\n",
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
860 "reservation to %d threads\n",
861 master_tid, tl_nthreads));
862 new_nthreads = tl_nthreads;
866 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
867 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
868 if (cg_nthreads + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
871 int tl_nthreads = max_cg_threads - cg_nthreads +
872 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
873 if (tl_nthreads <= 0) {
878 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
879 __kmp_reserve_warn = 1;
880 __kmp_msg(kmp_ms_warning,
881 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
882 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
884 if (tl_nthreads == 1) {
885 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
886 "reduced reservation to 1 thread\n",
890 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
891 "reservation to %d threads\n",
892 master_tid, tl_nthreads));
893 new_nthreads = tl_nthreads;
899 capacity = __kmp_threads_capacity;
900 if (TCR_PTR(__kmp_threads[0]) == NULL) {
906 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
907 capacity -= __kmp_hidden_helper_threads_num;
909 if (__kmp_nth + new_nthreads -
910 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
913 int slotsRequired = __kmp_nth + new_nthreads -
914 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 int slotsAdded = __kmp_expand_threads(slotsRequired);
917 if (slotsAdded < slotsRequired) {
919 new_nthreads -= (slotsRequired - slotsAdded);
920 KMP_ASSERT(new_nthreads >= 1);
923 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924 __kmp_reserve_warn = 1;
925 if (__kmp_tp_cached) {
926 __kmp_msg(kmp_ms_warning,
927 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
940 if (new_nthreads == 1) {
942 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
943 "dead roots and rechecking; requested %d threads\n",
944 __kmp_get_gtid(), set_nthreads));
946 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
948 __kmp_get_gtid(), new_nthreads, set_nthreads));
952 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
953 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
954 this_thr->th.th_nt_msg);
962static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
963 kmp_info_t *master_th,
int master_gtid,
964 int fork_teams_workers) {
968 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
969 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
973 master_th->th.th_info.ds.ds_tid = 0;
974 master_th->th.th_team = team;
975 master_th->th.th_team_nproc = team->t.t_nproc;
976 master_th->th.th_team_master = master_th;
977 master_th->th.th_team_serialized = FALSE;
978 master_th->th.th_dispatch = &team->t.t_dispatch[0];
981#if KMP_NESTED_HOT_TEAMS
983 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
986 int level = team->t.t_active_level - 1;
987 if (master_th->th.th_teams_microtask) {
988 if (master_th->th.th_teams_size.nteams > 1) {
992 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
993 master_th->th.th_teams_level == team->t.t_level) {
998 if (level < __kmp_hot_teams_max_level) {
999 if (hot_teams[level].hot_team) {
1001 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1005 hot_teams[level].hot_team = team;
1006 hot_teams[level].hot_team_nth = team->t.t_nproc;
1013 use_hot_team = team == root->r.r_hot_team;
1015 if (!use_hot_team) {
1018 team->t.t_threads[0] = master_th;
1019 __kmp_initialize_info(master_th, team, 0, master_gtid);
1022 for (i = 1; i < team->t.t_nproc; i++) {
1025 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1026 team->t.t_threads[i] = thr;
1027 KMP_DEBUG_ASSERT(thr);
1028 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1030 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1031 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1032 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1033 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1034 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1035 team->t.t_bar[bs_plain_barrier].b_arrived));
1036 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1037 thr->th.th_teams_level = master_th->th.th_teams_level;
1038 thr->th.th_teams_size = master_th->th.th_teams_size;
1041 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1042 for (b = 0; b < bs_last_barrier; ++b) {
1043 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1044 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1046 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1052#if KMP_AFFINITY_SUPPORTED
1056 if (!fork_teams_workers) {
1057 __kmp_partition_places(team);
1061 if (team->t.t_nproc > 1 &&
1062 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1063 team->t.b->update_num_threads(team->t.t_nproc);
1064 __kmp_add_threads_to_team(team, team->t.t_nproc);
1069 if (__kmp_tasking_mode != tskm_immediate_exec) {
1071 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1074 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1075 "%p, new task_team %p / team %p\n",
1076 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1077 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1081 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1082 master_th->th.th_task_state);
1086 if (team->t.t_nproc > 1) {
1087 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1088 team->t.t_threads[1]->th.th_task_state == 1);
1089 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1090 team->t.t_threads[1]->th.th_task_state);
1092 master_th->th.th_task_state = 0;
1096 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1097 master_th->th.th_task_state);
1099 master_th->th.th_task_state = 0;
1103 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1104 for (i = 0; i < team->t.t_nproc; i++) {
1105 kmp_info_t *thr = team->t.t_threads[i];
1106 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1107 thr->th.th_prev_level != team->t.t_level) {
1108 team->t.t_display_affinity = 1;
1117#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1121inline static void propagateFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control) {
1123 kmp_int16 x87_fpu_control_word;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
1140 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1141 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1144 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1148 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1154inline static void updateHWFPControl(kmp_team_t *team) {
1155 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1158 kmp_int16 x87_fpu_control_word;
1160 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1161 __kmp_store_mxcsr(&mxcsr);
1162 mxcsr &= KMP_X86_MXCSR_MASK;
1164 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1165 __kmp_clear_x87_fpu_status_word();
1166 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1169 if (team->t.t_mxcsr != mxcsr) {
1170 __kmp_load_mxcsr(&team->t.t_mxcsr);
1175#define propagateFPControl(x) ((void)0)
1176#define updateHWFPControl(x) ((void)0)
1179static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1184void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1185 kmp_info_t *this_thr;
1186 kmp_team_t *serial_team;
1188 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1195 if (!TCR_4(__kmp_init_parallel))
1196 __kmp_parallel_initialize();
1197 __kmp_resume_if_soft_paused();
1199 this_thr = __kmp_threads[global_tid];
1200 serial_team = this_thr->th.th_serial_team;
1203 KMP_DEBUG_ASSERT(serial_team);
1206 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1207 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1208 proc_bind = proc_bind_false;
1209 }
else if (proc_bind == proc_bind_default) {
1212 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1215 this_thr->th.th_set_proc_bind = proc_bind_default;
1220 if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1)
1221 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
1222 this_thr->th.th_nt_msg);
1224 this_thr->th.th_set_nproc = 0;
1227 ompt_data_t ompt_parallel_data = ompt_data_none;
1228 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1229 if (ompt_enabled.enabled &&
1230 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1232 ompt_task_info_t *parent_task_info;
1233 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1235 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1236 if (ompt_enabled.ompt_callback_parallel_begin) {
1239 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1240 &(parent_task_info->task_data), &(parent_task_info->frame),
1241 &ompt_parallel_data, team_size,
1242 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1247 if (this_thr->th.th_team != serial_team) {
1249 int level = this_thr->th.th_team->t.t_level;
1251 if (serial_team->t.t_serialized) {
1254 kmp_team_t *new_team;
1256 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1259 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1263 proc_bind, &this_thr->th.th_current_task->td_icvs,
1264 0 USE_NESTED_HOT_ARG(NULL));
1265 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1266 KMP_ASSERT(new_team);
1269 new_team->t.t_threads[0] = this_thr;
1270 new_team->t.t_parent = this_thr->th.th_team;
1271 serial_team = new_team;
1272 this_thr->th.th_serial_team = serial_team;
1276 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1277 global_tid, serial_team));
1285 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1286 global_tid, serial_team));
1290 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1291 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1292 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1293 serial_team->t.t_ident = loc;
1294 serial_team->t.t_serialized = 1;
1295 serial_team->t.t_nproc = 1;
1296 serial_team->t.t_parent = this_thr->th.th_team;
1297 if (this_thr->th.th_team->t.t_nested_nth)
1298 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1300 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1302 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1303 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1304 this_thr->th.th_team = serial_team;
1305 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1307 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1308 this_thr->th.th_current_task));
1309 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1310 this_thr->th.th_current_task->td_flags.executing = 0;
1312 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1317 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1318 &this_thr->th.th_current_task->td_parent->td_icvs);
1322 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1323 if (this_thr->th.th_team->t.t_nested_nth)
1324 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1325 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1326 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1329 if (__kmp_nested_proc_bind.used &&
1330 (level + 1 < __kmp_nested_proc_bind.used)) {
1331 this_thr->th.th_current_task->td_icvs.proc_bind =
1332 __kmp_nested_proc_bind.bind_types[level + 1];
1336 serial_team->t.t_pkfn = (microtask_t)(~0);
1338 this_thr->th.th_info.ds.ds_tid = 0;
1341 this_thr->th.th_team_nproc = 1;
1342 this_thr->th.th_team_master = this_thr;
1343 this_thr->th.th_team_serialized = 1;
1344 this_thr->th.th_task_team = NULL;
1345 this_thr->th.th_task_state = 0;
1347 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1348 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1349 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1351 propagateFPControl(serial_team);
1354 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1355 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1356 serial_team->t.t_dispatch->th_disp_buffer =
1357 (dispatch_private_info_t *)__kmp_allocate(
1358 sizeof(dispatch_private_info_t));
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1367 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1368 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1369 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1370 ++serial_team->t.t_serialized;
1371 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1374 int level = this_thr->th.th_team->t.t_level;
1378 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1379 if (serial_team->t.t_nested_nth)
1380 nested_nth = serial_team->t.t_nested_nth;
1381 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1382 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1385 serial_team->t.t_level++;
1386 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1387 "of serial team %p to %d\n",
1388 global_tid, serial_team, serial_team->t.t_level));
1391 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1393 dispatch_private_info_t *disp_buffer =
1394 (dispatch_private_info_t *)__kmp_allocate(
1395 sizeof(dispatch_private_info_t));
1396 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1397 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1399 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1402 __kmp_push_task_team_node(this_thr, serial_team);
1406 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1410 if (__kmp_display_affinity) {
1411 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1412 this_thr->th.th_prev_num_threads != 1) {
1414 __kmp_aux_display_affinity(global_tid, NULL);
1415 this_thr->th.th_prev_level = serial_team->t.t_level;
1416 this_thr->th.th_prev_num_threads = 1;
1420 if (__kmp_env_consistency_check)
1421 __kmp_push_parallel(global_tid, NULL);
1423 serial_team->t.ompt_team_info.master_return_address = codeptr;
1424 if (ompt_enabled.enabled &&
1425 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1426 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1427 OMPT_GET_FRAME_ADDRESS(0);
1429 ompt_lw_taskteam_t lw_taskteam;
1430 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1431 &ompt_parallel_data, codeptr);
1433 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1437 if (ompt_enabled.ompt_callback_implicit_task) {
1438 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1439 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1440 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1441 ompt_task_implicit);
1442 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1443 __kmp_tid_from_gtid(global_tid);
1447 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1448 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1449 OMPT_GET_FRAME_ADDRESS(0);
1455static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1456 microtask_t microtask,
int level,
1457 int teams_level, kmp_va_list ap) {
1458 return (master_th->th.th_teams_microtask && ap &&
1459 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1464static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1465 int teams_level, kmp_va_list ap) {
1466 return ((ap == NULL && active_level == 0) ||
1467 (ap && teams_level > 0 && teams_level == level));
1474__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1475 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1476 enum fork_context_e call_context, microtask_t microtask,
1477 launch_t invoker,
int master_set_numthreads,
int level,
1479 ompt_data_t ompt_parallel_data,
void *return_address,
1485 parent_team->t.t_ident = loc;
1486 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1487 parent_team->t.t_argc = argc;
1488 argv = (
void **)parent_team->t.t_argv;
1489 for (i = argc - 1; i >= 0; --i) {
1490 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1493 if (parent_team == master_th->th.th_serial_team) {
1496 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1498 if (call_context == fork_context_gnu) {
1501 parent_team->t.t_serialized--;
1506 parent_team->t.t_pkfn = microtask;
1511 void **exit_frame_p;
1512 ompt_data_t *implicit_task_data;
1513 ompt_lw_taskteam_t lw_taskteam;
1515 if (ompt_enabled.enabled) {
1516 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1517 &ompt_parallel_data, return_address);
1518 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1520 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1524 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1525 if (ompt_enabled.ompt_callback_implicit_task) {
1526 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1527 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1528 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1529 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1533 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1535 exit_frame_p = &dummy;
1541 parent_team->t.t_serialized--;
1544 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1545 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1546 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1555 if (ompt_enabled.enabled) {
1556 *exit_frame_p = NULL;
1557 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1558 if (ompt_enabled.ompt_callback_implicit_task) {
1559 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1560 ompt_scope_end, NULL, implicit_task_data, 1,
1561 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1563 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1564 __ompt_lw_taskteam_unlink(master_th);
1565 if (ompt_enabled.ompt_callback_parallel_end) {
1566 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1567 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1568 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1570 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1576 parent_team->t.t_pkfn = microtask;
1577 parent_team->t.t_invoke = invoker;
1578 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1579 parent_team->t.t_active_level++;
1580 parent_team->t.t_level++;
1581 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1588 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1591 if (ompt_enabled.enabled) {
1592 ompt_lw_taskteam_t lw_taskteam;
1593 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1595 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1600 if (master_set_numthreads) {
1601 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1603 kmp_info_t **other_threads = parent_team->t.t_threads;
1606 int old_proc = master_th->th.th_teams_size.nth;
1607 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1608 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1609 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1611 parent_team->t.t_nproc = master_set_numthreads;
1612 for (i = 0; i < master_set_numthreads; ++i) {
1613 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1617 master_th->th.th_set_nproc = 0;
1621 if (__kmp_debugging) {
1622 int nth = __kmp_omp_num_threads(loc);
1624 master_set_numthreads = nth;
1630 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1632 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1633 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1634 proc_bind = proc_bind_false;
1637 if (proc_bind == proc_bind_default) {
1638 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1644 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1645 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1646 master_th->th.th_current_task->td_icvs.proc_bind)) {
1647 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1650 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1652 if (proc_bind_icv != proc_bind_default &&
1653 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1654 kmp_info_t **other_threads = parent_team->t.t_threads;
1655 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1656 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1660 master_th->th.th_set_proc_bind = proc_bind_default;
1662#if USE_ITT_BUILD && USE_ITT_NOTIFY
1663 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1665 __kmp_forkjoin_frames_mode == 3 &&
1666 parent_team->t.t_active_level == 1
1667 && master_th->th.th_teams_size.nteams == 1) {
1668 kmp_uint64 tmp_time = __itt_get_timestamp();
1669 master_th->th.th_frame_time = tmp_time;
1670 parent_team->t.t_region_time = tmp_time;
1672 if (__itt_stack_caller_create_ptr) {
1673 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1675 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1678#if KMP_AFFINITY_SUPPORTED
1679 __kmp_partition_places(parent_team);
1682 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1683 "master_th=%p, gtid=%d\n",
1684 root, parent_team, master_th, gtid));
1685 __kmp_internal_fork(loc, gtid, parent_team);
1686 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1687 "master_th=%p, gtid=%d\n",
1688 root, parent_team, master_th, gtid));
1690 if (call_context == fork_context_gnu)
1694 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1695 parent_team->t.t_id, parent_team->t.t_pkfn));
1697 if (!parent_team->t.t_invoke(gtid)) {
1698 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1700 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1701 parent_team->t.t_id, parent_team->t.t_pkfn));
1704 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1711__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1712 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1713 kmp_info_t *master_th, kmp_team_t *parent_team,
1715 ompt_data_t *ompt_parallel_data,
void **return_address,
1716 ompt_data_t **parent_task_data,
1724#if KMP_OS_LINUX && \
1725 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1728 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1733 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1738 master_th->th.th_serial_team->t.t_pkfn = microtask;
1741 if (call_context == fork_context_intel) {
1743 master_th->th.th_serial_team->t.t_ident = loc;
1746 master_th->th.th_serial_team->t.t_level--;
1751 void **exit_frame_p;
1752 ompt_task_info_t *task_info;
1753 ompt_lw_taskteam_t lw_taskteam;
1755 if (ompt_enabled.enabled) {
1756 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1757 ompt_parallel_data, *return_address);
1759 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1761 task_info = OMPT_CUR_TASK_INFO(master_th);
1762 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1763 if (ompt_enabled.ompt_callback_implicit_task) {
1764 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1765 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1766 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1767 &(task_info->task_data), 1,
1768 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1772 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1774 exit_frame_p = &dummy;
1779 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1780 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1781 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1790 if (ompt_enabled.enabled) {
1791 *exit_frame_p = NULL;
1792 if (ompt_enabled.ompt_callback_implicit_task) {
1793 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1794 ompt_scope_end, NULL, &(task_info->task_data), 1,
1795 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1797 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1798 __ompt_lw_taskteam_unlink(master_th);
1799 if (ompt_enabled.ompt_callback_parallel_end) {
1800 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1801 ompt_parallel_data, *parent_task_data,
1802 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1804 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1807 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1808 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1809 team = master_th->th.th_team;
1811 team->t.t_invoke = invoker;
1812 __kmp_alloc_argv_entries(argc, team, TRUE);
1813 team->t.t_argc = argc;
1814 argv = (
void **)team->t.t_argv;
1815 for (i = argc - 1; i >= 0; --i)
1816 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1823 if (ompt_enabled.enabled) {
1824 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1825 if (ompt_enabled.ompt_callback_implicit_task) {
1826 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1827 ompt_scope_end, NULL, &(task_info->task_data), 0,
1828 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1830 if (ompt_enabled.ompt_callback_parallel_end) {
1831 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1832 ompt_parallel_data, *parent_task_data,
1833 OMPT_INVOKER(call_context) | ompt_parallel_league,
1836 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1841 for (i = argc - 1; i >= 0; --i)
1842 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1847 void **exit_frame_p;
1848 ompt_task_info_t *task_info;
1849 ompt_lw_taskteam_t lw_taskteam;
1850 ompt_data_t *implicit_task_data;
1852 if (ompt_enabled.enabled) {
1853 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1854 ompt_parallel_data, *return_address);
1855 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1857 task_info = OMPT_CUR_TASK_INFO(master_th);
1858 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1861 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1865 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1866 ompt_task_implicit);
1867 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1871 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1873 exit_frame_p = &dummy;
1878 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1879 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1880 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1889 if (ompt_enabled.enabled) {
1890 *exit_frame_p = NULL;
1891 if (ompt_enabled.ompt_callback_implicit_task) {
1892 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1893 ompt_scope_end, NULL, &(task_info->task_data), 1,
1894 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1897 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1898 __ompt_lw_taskteam_unlink(master_th);
1899 if (ompt_enabled.ompt_callback_parallel_end) {
1900 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1901 ompt_parallel_data, *parent_task_data,
1902 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1904 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1908 }
else if (call_context == fork_context_gnu) {
1910 if (ompt_enabled.enabled) {
1911 ompt_lw_taskteam_t lwt;
1912 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1915 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1916 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1922 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1925 KMP_ASSERT2(call_context < fork_context_last,
1926 "__kmp_serial_fork_call: unknown fork_context parameter");
1929 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1936int __kmp_fork_call(
ident_t *loc,
int gtid,
1937 enum fork_context_e call_context,
1938 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1943 int master_this_cons;
1945 kmp_team_t *parent_team;
1946 kmp_info_t *master_th;
1950 int master_set_numthreads;
1951 int task_thread_limit = 0;
1955#if KMP_NESTED_HOT_TEAMS
1956 kmp_hot_team_ptr_t **p_hot_teams;
1959 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1962 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1963 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1966 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1968 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1969 __kmp_stkpadding += (short)((kmp_int64)dummy);
1975 if (!TCR_4(__kmp_init_parallel))
1976 __kmp_parallel_initialize();
1977 __kmp_resume_if_soft_paused();
1982 master_th = __kmp_threads[gtid];
1984 parent_team = master_th->th.th_team;
1985 master_tid = master_th->th.th_info.ds.ds_tid;
1986 master_this_cons = master_th->th.th_local.this_construct;
1987 root = master_th->th.th_root;
1988 master_active = root->r.r_active;
1989 master_set_numthreads = master_th->th.th_set_nproc;
1991 master_th->th.th_current_task->td_icvs.task_thread_limit;
1994 ompt_data_t ompt_parallel_data = ompt_data_none;
1995 ompt_data_t *parent_task_data = NULL;
1996 ompt_frame_t *ompt_frame = NULL;
1997 void *return_address = NULL;
1999 if (ompt_enabled.enabled) {
2000 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
2002 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
2007 __kmp_assign_root_init_mask();
2010 level = parent_team->t.t_level;
2012 active_level = parent_team->t.t_active_level;
2014 teams_level = master_th->th.th_teams_level;
2015#if KMP_NESTED_HOT_TEAMS
2016 p_hot_teams = &master_th->th.th_hot_teams;
2017 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2018 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2019 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2020 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2022 (*p_hot_teams)[0].hot_team_nth = 1;
2027 if (ompt_enabled.enabled) {
2028 if (ompt_enabled.ompt_callback_parallel_begin) {
2029 int team_size = master_set_numthreads
2030 ? master_set_numthreads
2031 : get__nproc_2(parent_team, master_tid);
2032 int flags = OMPT_INVOKER(call_context) |
2033 ((microtask == (microtask_t)__kmp_teams_master)
2034 ? ompt_parallel_league
2035 : ompt_parallel_team);
2036 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2037 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2040 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2044 master_th->th.th_ident = loc;
2047 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2048 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2049 call_context, microtask, invoker,
2050 master_set_numthreads, level,
2052 ompt_parallel_data, return_address,
2061 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2065 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2066 if ((!enter_teams &&
2067 (parent_team->t.t_active_level >=
2068 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2069 (__kmp_library == library_serial)) {
2070 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2073 nthreads = master_set_numthreads
2074 ? master_set_numthreads
2076 : get__nproc_2(parent_team, master_tid);
2079 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2086 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2091 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2092 nthreads, enter_teams);
2093 if (nthreads == 1) {
2097 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2101 KMP_DEBUG_ASSERT(nthreads > 0);
2104 master_th->th.th_set_nproc = 0;
2106 if (nthreads == 1) {
2107 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2108 invoker, master_th, parent_team,
2110 &ompt_parallel_data, &return_address,
2118 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2119 "curtask=%p, curtask_max_aclevel=%d\n",
2120 parent_team->t.t_active_level, master_th,
2121 master_th->th.th_current_task,
2122 master_th->th.th_current_task->td_icvs.max_active_levels));
2126 master_th->th.th_current_task->td_flags.executing = 0;
2128 if (!master_th->th.th_teams_microtask || level > teams_level) {
2130 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2134 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2135 kmp_nested_nthreads_t *nested_nth = NULL;
2136 if (!master_th->th.th_set_nested_nth &&
2137 (level + 1 < parent_team->t.t_nested_nth->used) &&
2138 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2139 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2140 }
else if (master_th->th.th_set_nested_nth) {
2141 nested_nth = __kmp_override_nested_nth(master_th, level);
2142 if ((level + 1 < nested_nth->used) &&
2143 (nested_nth->nth[level + 1] != nthreads_icv))
2144 nthreads_icv = nested_nth->nth[level + 1];
2152 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2154 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2155 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2156 proc_bind = proc_bind_false;
2160 if (proc_bind == proc_bind_default) {
2161 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2164 if (master_th->th.th_teams_microtask &&
2165 microtask == (microtask_t)__kmp_teams_master) {
2166 proc_bind = __kmp_teams_proc_bind;
2172 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2173 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2174 master_th->th.th_current_task->td_icvs.proc_bind)) {
2177 if (!master_th->th.th_teams_microtask ||
2178 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2179 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2184 master_th->th.th_set_proc_bind = proc_bind_default;
2186 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2187 kmp_internal_control_t new_icvs;
2188 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2189 new_icvs.next = NULL;
2190 if (nthreads_icv > 0) {
2191 new_icvs.nproc = nthreads_icv;
2193 if (proc_bind_icv != proc_bind_default) {
2194 new_icvs.proc_bind = proc_bind_icv;
2198 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2199 team = __kmp_allocate_team(root, nthreads, nthreads,
2203 proc_bind, &new_icvs,
2204 argc USE_NESTED_HOT_ARG(master_th));
2205 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2206 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2209 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2210 team = __kmp_allocate_team(root, nthreads, nthreads,
2215 &master_th->th.th_current_task->td_icvs,
2216 argc USE_NESTED_HOT_ARG(master_th));
2217 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2218 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2219 &master_th->th.th_current_task->td_icvs);
2222 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2225 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2226 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2227 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2228 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2229 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2231 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2234 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2236 if (!master_th->th.th_teams_microtask || level > teams_level) {
2237 int new_level = parent_team->t.t_level + 1;
2238 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2239 new_level = parent_team->t.t_active_level + 1;
2240 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2243 int new_level = parent_team->t.t_level;
2244 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2245 new_level = parent_team->t.t_active_level;
2246 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2248 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2250 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2252 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2253 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2256 if (team->t.t_nested_nth &&
2257 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2258 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2259 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2260 team->t.t_nested_nth = NULL;
2262 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2263 if (master_th->th.th_set_nested_nth) {
2265 nested_nth = __kmp_override_nested_nth(master_th, level);
2266 team->t.t_nested_nth = nested_nth;
2267 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2268 master_th->th.th_set_nested_nth = NULL;
2269 master_th->th.th_set_nested_nth_sz = 0;
2270 master_th->th.th_nt_strict =
false;
2274 propagateFPControl(team);
2276 if (ompd_state & OMPD_ENABLE_BP)
2277 ompd_bp_parallel_begin();
2282 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2283 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2285 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2286 (team->t.t_master_tid == 0 &&
2287 (team->t.t_parent == root->r.r_root_team ||
2288 team->t.t_parent->t.t_serialized)));
2292 argv = (
void **)team->t.t_argv;
2294 for (i = argc - 1; i >= 0; --i) {
2295 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2296 KMP_CHECK_UPDATE(*argv, new_argv);
2300 for (i = 0; i < argc; ++i) {
2302 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2307 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2308 if (!root->r.r_active)
2309 root->r.r_active = TRUE;
2311 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2312 __kmp_setup_icv_copy(team, nthreads,
2313 &master_th->th.th_current_task->td_icvs, loc);
2316 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2319 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2322 if (team->t.t_active_level == 1
2323 && !master_th->th.th_teams_microtask) {
2325 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2326 (__kmp_forkjoin_frames_mode == 3 ||
2327 __kmp_forkjoin_frames_mode == 1)) {
2328 kmp_uint64 tmp_time = 0;
2329 if (__itt_get_timestamp_ptr)
2330 tmp_time = __itt_get_timestamp();
2332 master_th->th.th_frame_time = tmp_time;
2333 if (__kmp_forkjoin_frames_mode == 3)
2334 team->t.t_region_time = tmp_time;
2338 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2339 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2341 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2347 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2350 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2351 root, team, master_th, gtid));
2354 if (__itt_stack_caller_create_ptr) {
2357 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2358 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2359 }
else if (parent_team->t.t_serialized) {
2364 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2365 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2373 __kmp_internal_fork(loc, gtid, team);
2374 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2375 "master_th=%p, gtid=%d\n",
2376 root, team, master_th, gtid));
2379 if (call_context == fork_context_gnu) {
2380 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2385 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2386 team->t.t_id, team->t.t_pkfn));
2389#if KMP_STATS_ENABLED
2393 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2397 if (!team->t.t_invoke(gtid)) {
2398 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2401#if KMP_STATS_ENABLED
2404 KMP_SET_THREAD_STATE(previous_state);
2408 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2409 team->t.t_id, team->t.t_pkfn));
2412 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2414 if (ompt_enabled.enabled) {
2415 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2423static inline void __kmp_join_restore_state(kmp_info_t *thread,
2426 thread->th.ompt_thread_info.state =
2427 ((team->t.t_serialized) ? ompt_state_work_serial
2428 : ompt_state_work_parallel);
2431static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2432 kmp_team_t *team, ompt_data_t *parallel_data,
2433 int flags,
void *codeptr) {
2434 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2435 if (ompt_enabled.ompt_callback_parallel_end) {
2436 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2437 parallel_data, &(task_info->task_data), flags, codeptr);
2440 task_info->frame.enter_frame = ompt_data_none;
2441 __kmp_join_restore_state(thread, team);
2445void __kmp_join_call(
ident_t *loc,
int gtid
2448 enum fork_context_e fork_context
2452 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2454 kmp_team_t *parent_team;
2455 kmp_info_t *master_th;
2459 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2462 master_th = __kmp_threads[gtid];
2463 root = master_th->th.th_root;
2464 team = master_th->th.th_team;
2465 parent_team = team->t.t_parent;
2467 master_th->th.th_ident = loc;
2470 void *team_microtask = (
void *)team->t.t_pkfn;
2474 if (ompt_enabled.enabled &&
2475 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2476 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2481 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2482 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2483 "th_task_team = %p\n",
2484 __kmp_gtid_from_thread(master_th), team,
2485 team->t.t_task_team[master_th->th.th_task_state],
2486 master_th->th.th_task_team));
2487 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2491 if (team->t.t_serialized) {
2492 if (master_th->th.th_teams_microtask) {
2494 int level = team->t.t_level;
2495 int tlevel = master_th->th.th_teams_level;
2496 if (level == tlevel) {
2500 }
else if (level == tlevel + 1) {
2504 team->t.t_serialized++;
2510 if (ompt_enabled.enabled) {
2511 if (fork_context == fork_context_gnu) {
2512 __ompt_lw_taskteam_unlink(master_th);
2514 __kmp_join_restore_state(master_th, parent_team);
2521 master_active = team->t.t_master_active;
2526 __kmp_internal_join(loc, gtid, team);
2528 if (__itt_stack_caller_create_ptr) {
2529 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2531 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2532 team->t.t_stack_id = NULL;
2536 master_th->th.th_task_state =
2539 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2540 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2544 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2545 parent_team->t.t_stack_id = NULL;
2553 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2554 void *codeptr = team->t.ompt_team_info.master_return_address;
2559 if (team->t.t_active_level == 1 &&
2560 (!master_th->th.th_teams_microtask ||
2561 master_th->th.th_teams_size.nteams == 1)) {
2562 master_th->th.th_ident = loc;
2565 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2566 __kmp_forkjoin_frames_mode == 3)
2567 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2568 master_th->th.th_frame_time, 0, loc,
2569 master_th->th.th_team_nproc, 1);
2570 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2571 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2572 __kmp_itt_region_joined(gtid);
2576#if KMP_AFFINITY_SUPPORTED
2579 master_th->th.th_first_place = team->t.t_first_place;
2580 master_th->th.th_last_place = team->t.t_last_place;
2584 if (master_th->th.th_teams_microtask && !exit_teams &&
2585 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2586 team->t.t_level == master_th->th.th_teams_level + 1) {
2591 ompt_data_t ompt_parallel_data = ompt_data_none;
2592 if (ompt_enabled.enabled) {
2593 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2594 if (ompt_enabled.ompt_callback_implicit_task) {
2595 int ompt_team_size = team->t.t_nproc;
2596 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2597 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2598 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2600 task_info->frame.exit_frame = ompt_data_none;
2601 task_info->task_data = ompt_data_none;
2602 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2603 __ompt_lw_taskteam_unlink(master_th);
2608 team->t.t_active_level--;
2609 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2615 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2616 int old_num = master_th->th.th_team_nproc;
2617 int new_num = master_th->th.th_teams_size.nth;
2618 kmp_info_t **other_threads = team->t.t_threads;
2619 team->t.t_nproc = new_num;
2620 for (
int i = 0; i < old_num; ++i) {
2621 other_threads[i]->th.th_team_nproc = new_num;
2624 for (
int i = old_num; i < new_num; ++i) {
2626 KMP_DEBUG_ASSERT(other_threads[i]);
2627 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2628 for (
int b = 0; b < bs_last_barrier; ++b) {
2629 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2630 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2632 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2635 if (__kmp_tasking_mode != tskm_immediate_exec) {
2637 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2643 if (ompt_enabled.enabled) {
2644 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2645 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2653 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2654 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2656 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2661 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2663 if (!master_th->th.th_teams_microtask ||
2664 team->t.t_level > master_th->th.th_teams_level) {
2666 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2668 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2671 if (ompt_enabled.enabled) {
2672 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2673 if (ompt_enabled.ompt_callback_implicit_task) {
2674 int flags = (team_microtask == (
void *)__kmp_teams_master)
2676 : ompt_task_implicit;
2677 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2678 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2679 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2680 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2682 task_info->frame.exit_frame = ompt_data_none;
2683 task_info->task_data = ompt_data_none;
2687 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2689 __kmp_pop_current_task_from_thread(master_th);
2691 master_th->th.th_def_allocator = team->t.t_def_allocator;
2694 if (ompd_state & OMPD_ENABLE_BP)
2695 ompd_bp_parallel_end();
2697 updateHWFPControl(team);
2699 if (root->r.r_active != master_active)
2700 root->r.r_active = master_active;
2702 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2710 master_th->th.th_team = parent_team;
2711 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2712 master_th->th.th_team_master = parent_team->t.t_threads[0];
2713 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2716 if (parent_team->t.t_serialized &&
2717 parent_team != master_th->th.th_serial_team &&
2718 parent_team != root->r.r_root_team) {
2719 __kmp_free_team(root,
2720 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2721 master_th->th.th_serial_team = parent_team;
2724 if (__kmp_tasking_mode != tskm_immediate_exec) {
2726 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2727 team->t.t_primary_task_state == 1);
2728 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2731 master_th->th.th_task_team =
2732 parent_team->t.t_task_team[master_th->th.th_task_state];
2734 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2735 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2742 master_th->th.th_current_task->td_flags.executing = 1;
2744 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2746#if KMP_AFFINITY_SUPPORTED
2747 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2748 __kmp_reset_root_init_mask(gtid);
2753 OMPT_INVOKER(fork_context) |
2754 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2755 : ompt_parallel_team);
2756 if (ompt_enabled.enabled) {
2757 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2763 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2768void __kmp_save_internal_controls(kmp_info_t *thread) {
2770 if (thread->th.th_team != thread->th.th_serial_team) {
2773 if (thread->th.th_team->t.t_serialized > 1) {
2776 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2779 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2780 thread->th.th_team->t.t_serialized) {
2785 kmp_internal_control_t *control =
2786 (kmp_internal_control_t *)__kmp_allocate(
2787 sizeof(kmp_internal_control_t));
2789 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2791 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2793 control->next = thread->th.th_team->t.t_control_stack_top;
2794 thread->th.th_team->t.t_control_stack_top = control;
2800void __kmp_set_num_threads(
int new_nth,
int gtid) {
2804 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2805 KMP_DEBUG_ASSERT(__kmp_init_serial);
2809 else if (new_nth > __kmp_max_nth)
2810 new_nth = __kmp_max_nth;
2813 thread = __kmp_threads[gtid];
2814 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2817 __kmp_save_internal_controls(thread);
2819 set__nproc(thread, new_nth);
2824 root = thread->th.th_root;
2825 if (__kmp_init_parallel && (!root->r.r_active) &&
2826 (root->r.r_hot_team->t.t_nproc > new_nth)
2827#
if KMP_NESTED_HOT_TEAMS
2828 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2831 kmp_team_t *hot_team = root->r.r_hot_team;
2834 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2836 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2837 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2840 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2841 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2842 if (__kmp_tasking_mode != tskm_immediate_exec) {
2845 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2847 __kmp_free_thread(hot_team->t.t_threads[f]);
2848 hot_team->t.t_threads[f] = NULL;
2850 hot_team->t.t_nproc = new_nth;
2851#if KMP_NESTED_HOT_TEAMS
2852 if (thread->th.th_hot_teams) {
2853 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2854 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2858 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2859 hot_team->t.b->update_num_threads(new_nth);
2860 __kmp_add_threads_to_team(hot_team, new_nth);
2863 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2866 for (f = 0; f < new_nth; f++) {
2867 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2868 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2871 hot_team->t.t_size_changed = -1;
2876void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2879 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2881 gtid, max_active_levels));
2882 KMP_DEBUG_ASSERT(__kmp_init_serial);
2885 if (max_active_levels < 0) {
2886 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2891 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2892 "max_active_levels for thread %d = (%d)\n",
2893 gtid, max_active_levels));
2896 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2901 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2902 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2903 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2909 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2910 "max_active_levels for thread %d = (%d)\n",
2911 gtid, max_active_levels));
2913 thread = __kmp_threads[gtid];
2915 __kmp_save_internal_controls(thread);
2917 set__max_active_levels(thread, max_active_levels);
2921int __kmp_get_max_active_levels(
int gtid) {
2924 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2925 KMP_DEBUG_ASSERT(__kmp_init_serial);
2927 thread = __kmp_threads[gtid];
2928 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2929 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2930 "curtask_maxaclevel=%d\n",
2931 gtid, thread->th.th_current_task,
2932 thread->th.th_current_task->td_icvs.max_active_levels));
2933 return thread->th.th_current_task->td_icvs.max_active_levels;
2937void __kmp_set_num_teams(
int num_teams) {
2939 __kmp_nteams = num_teams;
2941int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2943void __kmp_set_teams_thread_limit(
int limit) {
2945 __kmp_teams_thread_limit = limit;
2947int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2949KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2950KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2953void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2955 kmp_sched_t orig_kind;
2958 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2959 gtid, (
int)kind, chunk));
2960 KMP_DEBUG_ASSERT(__kmp_init_serial);
2967 kind = __kmp_sched_without_mods(kind);
2969 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2970 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2972 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2973 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2975 kind = kmp_sched_default;
2979 thread = __kmp_threads[gtid];
2981 __kmp_save_internal_controls(thread);
2983 if (kind < kmp_sched_upper_std) {
2984 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2987 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2989 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2990 __kmp_sch_map[kind - kmp_sched_lower - 1];
2995 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2996 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2997 kmp_sched_lower - 2];
2999 __kmp_sched_apply_mods_intkind(
3000 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
3001 if (kind == kmp_sched_auto || chunk < 1) {
3003 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
3005 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
3010void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
3014 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3015 KMP_DEBUG_ASSERT(__kmp_init_serial);
3017 thread = __kmp_threads[gtid];
3019 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3020 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3022 case kmp_sch_static_greedy:
3023 case kmp_sch_static_balanced:
3024 *kind = kmp_sched_static;
3025 __kmp_sched_apply_mods_stdkind(kind, th_type);
3028 case kmp_sch_static_chunked:
3029 *kind = kmp_sched_static;
3031 case kmp_sch_dynamic_chunked:
3032 *kind = kmp_sched_dynamic;
3035 case kmp_sch_guided_iterative_chunked:
3036 case kmp_sch_guided_analytical_chunked:
3037 *kind = kmp_sched_guided;
3040 *kind = kmp_sched_auto;
3042 case kmp_sch_trapezoidal:
3043 *kind = kmp_sched_trapezoidal;
3045#if KMP_STATIC_STEAL_ENABLED
3046 case kmp_sch_static_steal:
3047 *kind = kmp_sched_static_steal;
3051 KMP_FATAL(UnknownSchedulingType, th_type);
3054 __kmp_sched_apply_mods_stdkind(kind, th_type);
3055 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3058int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3064 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3065 KMP_DEBUG_ASSERT(__kmp_init_serial);
3072 thr = __kmp_threads[gtid];
3073 team = thr->th.th_team;
3074 ii = team->t.t_level;
3078 if (thr->th.th_teams_microtask) {
3080 int tlevel = thr->th.th_teams_level;
3083 KMP_DEBUG_ASSERT(ii >= tlevel);
3095 return __kmp_tid_from_gtid(gtid);
3097 dd = team->t.t_serialized;
3099 while (ii > level) {
3100 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3102 if ((team->t.t_serialized) && (!dd)) {
3103 team = team->t.t_parent;
3107 team = team->t.t_parent;
3108 dd = team->t.t_serialized;
3113 return (dd > 1) ? (0) : (team->t.t_master_tid);
3116int __kmp_get_team_size(
int gtid,
int level) {
3122 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3123 KMP_DEBUG_ASSERT(__kmp_init_serial);
3130 thr = __kmp_threads[gtid];
3131 team = thr->th.th_team;
3132 ii = team->t.t_level;
3136 if (thr->th.th_teams_microtask) {
3138 int tlevel = thr->th.th_teams_level;
3141 KMP_DEBUG_ASSERT(ii >= tlevel);
3152 while (ii > level) {
3153 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3155 if (team->t.t_serialized && (!dd)) {
3156 team = team->t.t_parent;
3160 team = team->t.t_parent;
3165 return team->t.t_nproc;
3168kmp_r_sched_t __kmp_get_schedule_global() {
3173 kmp_r_sched_t r_sched;
3179 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3180 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3183 r_sched.r_sched_type = __kmp_static;
3186 r_sched.r_sched_type = __kmp_guided;
3188 r_sched.r_sched_type = __kmp_sched;
3190 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3192 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3194 r_sched.chunk = KMP_DEFAULT_CHUNK;
3196 r_sched.chunk = __kmp_chunk;
3204static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3206 KMP_DEBUG_ASSERT(team);
3207 if (!realloc || argc > team->t.t_max_argc) {
3209 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3210 "current entries=%d\n",
3211 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3213 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3214 __kmp_free((
void *)team->t.t_argv);
3216 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3218 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3219 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3221 team->t.t_id, team->t.t_max_argc));
3222 team->t.t_argv = &team->t.t_inline_argv[0];
3223 if (__kmp_storage_map) {
3224 __kmp_print_storage_map_gtid(
3225 -1, &team->t.t_inline_argv[0],
3226 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3227 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3232 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3233 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3235 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3237 team->t.t_id, team->t.t_max_argc));
3239 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3240 if (__kmp_storage_map) {
3241 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3242 &team->t.t_argv[team->t.t_max_argc],
3243 sizeof(
void *) * team->t.t_max_argc,
3244 "team_%d.t_argv", team->t.t_id);
3250static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3252 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3254 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3255 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3256 sizeof(dispatch_shared_info_t) * num_disp_buff);
3257 team->t.t_dispatch =
3258 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3259 team->t.t_implicit_task_taskdata =
3260 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3261 team->t.t_max_nproc = max_nth;
3264 for (i = 0; i < num_disp_buff; ++i) {
3265 team->t.t_disp_buffer[i].buffer_index = i;
3266 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3270static void __kmp_free_team_arrays(kmp_team_t *team) {
3273 for (i = 0; i < team->t.t_max_nproc; ++i) {
3274 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3275 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3276 team->t.t_dispatch[i].th_disp_buffer = NULL;
3279#if KMP_USE_HIER_SCHED
3280 __kmp_dispatch_free_hierarchies(team);
3282 __kmp_free(team->t.t_threads);
3283 __kmp_free(team->t.t_disp_buffer);
3284 __kmp_free(team->t.t_dispatch);
3285 __kmp_free(team->t.t_implicit_task_taskdata);
3286 team->t.t_threads = NULL;
3287 team->t.t_disp_buffer = NULL;
3288 team->t.t_dispatch = NULL;
3289 team->t.t_implicit_task_taskdata = 0;
3292static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3293 kmp_info_t **oldThreads = team->t.t_threads;
3295 __kmp_free(team->t.t_disp_buffer);
3296 __kmp_free(team->t.t_dispatch);
3297 __kmp_free(team->t.t_implicit_task_taskdata);
3298 __kmp_allocate_team_arrays(team, max_nth);
3300 KMP_MEMCPY(team->t.t_threads, oldThreads,
3301 team->t.t_nproc *
sizeof(kmp_info_t *));
3303 __kmp_free(oldThreads);
3306static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3308 kmp_r_sched_t r_sched =
3309 __kmp_get_schedule_global();
3311 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3313 kmp_internal_control_t g_icvs = {
3315 (kmp_int8)__kmp_global.g.g_dynamic,
3317 (kmp_int8)__kmp_env_blocktime,
3319 __kmp_dflt_blocktime,
3324 __kmp_dflt_team_nth,
3330 __kmp_dflt_max_active_levels,
3334 __kmp_nested_proc_bind.bind_types[0],
3335 __kmp_default_device,
3342static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3344 kmp_internal_control_t gx_icvs;
3345 gx_icvs.serial_nesting_level =
3347 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3348 gx_icvs.next = NULL;
3353static void __kmp_initialize_root(kmp_root_t *root) {
3355 kmp_team_t *root_team;
3356 kmp_team_t *hot_team;
3357 int hot_team_max_nth;
3358 kmp_r_sched_t r_sched =
3359 __kmp_get_schedule_global();
3360 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3361 KMP_DEBUG_ASSERT(root);
3362 KMP_ASSERT(!root->r.r_begin);
3365 __kmp_init_lock(&root->r.r_begin_lock);
3366 root->r.r_begin = FALSE;
3367 root->r.r_active = FALSE;
3368 root->r.r_in_parallel = 0;
3369 root->r.r_blocktime = __kmp_dflt_blocktime;
3370#if KMP_AFFINITY_SUPPORTED
3371 root->r.r_affinity_assigned = FALSE;
3376 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3379 __kmp_allocate_team(root,
3385 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3387 USE_NESTED_HOT_ARG(NULL)
3392 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3395 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3397 root->r.r_root_team = root_team;
3398 root_team->t.t_control_stack_top = NULL;
3401 root_team->t.t_threads[0] = NULL;
3402 root_team->t.t_nproc = 1;
3403 root_team->t.t_serialized = 1;
3405 root_team->t.t_sched.sched = r_sched.sched;
3406 root_team->t.t_nested_nth = &__kmp_nested_nth;
3409 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3410 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3414 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3417 __kmp_allocate_team(root,
3419 __kmp_dflt_team_nth_ub * 2,
3423 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3425 USE_NESTED_HOT_ARG(NULL)
3427 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3429 root->r.r_hot_team = hot_team;
3430 root_team->t.t_control_stack_top = NULL;
3433 hot_team->t.t_parent = root_team;
3436 hot_team_max_nth = hot_team->t.t_max_nproc;
3437 for (f = 0; f < hot_team_max_nth; ++f) {
3438 hot_team->t.t_threads[f] = NULL;
3440 hot_team->t.t_nproc = 1;
3442 hot_team->t.t_sched.sched = r_sched.sched;
3443 hot_team->t.t_size_changed = 0;
3444 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3449typedef struct kmp_team_list_item {
3450 kmp_team_p
const *entry;
3451 struct kmp_team_list_item *next;
3452} kmp_team_list_item_t;
3453typedef kmp_team_list_item_t *kmp_team_list_t;
3455static void __kmp_print_structure_team_accum(
3456 kmp_team_list_t list,
3457 kmp_team_p
const *team
3467 KMP_DEBUG_ASSERT(list != NULL);
3472 __kmp_print_structure_team_accum(list, team->t.t_parent);
3473 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3477 while (l->next != NULL && l->entry != team) {
3480 if (l->next != NULL) {
3486 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3492 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3493 sizeof(kmp_team_list_item_t));
3500static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3503 __kmp_printf(
"%s", title);
3505 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3507 __kmp_printf(
" - (nil)\n");
3511static void __kmp_print_structure_thread(
char const *title,
3512 kmp_info_p
const *thread) {
3513 __kmp_printf(
"%s", title);
3514 if (thread != NULL) {
3515 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3517 __kmp_printf(
" - (nil)\n");
3521void __kmp_print_structure(
void) {
3523 kmp_team_list_t list;
3527 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3531 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3532 "Table\n------------------------------\n");
3535 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3536 __kmp_printf(
"%2d", gtid);
3537 if (__kmp_threads != NULL) {
3538 __kmp_printf(
" %p", __kmp_threads[gtid]);
3540 if (__kmp_root != NULL) {
3541 __kmp_printf(
" %p", __kmp_root[gtid]);
3548 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3550 if (__kmp_threads != NULL) {
3552 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3553 kmp_info_t
const *thread = __kmp_threads[gtid];
3554 if (thread != NULL) {
3555 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3556 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3557 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3558 __kmp_print_structure_team(
" Serial Team: ",
3559 thread->th.th_serial_team);
3560 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3561 __kmp_print_structure_thread(
" Primary: ",
3562 thread->th.th_team_master);
3563 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3564 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3565 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3566 __kmp_print_structure_thread(
" Next in pool: ",
3567 thread->th.th_next_pool);
3569 __kmp_print_structure_team_accum(list, thread->th.th_team);
3570 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3574 __kmp_printf(
"Threads array is not allocated.\n");
3578 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3580 if (__kmp_root != NULL) {
3582 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3583 kmp_root_t
const *root = __kmp_root[gtid];
3585 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3586 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3587 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3588 __kmp_print_structure_thread(
" Uber Thread: ",
3589 root->r.r_uber_thread);
3590 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3591 __kmp_printf(
" In Parallel: %2d\n",
3592 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3594 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3595 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3599 __kmp_printf(
"Ubers array is not allocated.\n");
3602 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3604 while (list->next != NULL) {
3605 kmp_team_p
const *team = list->entry;
3607 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3608 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3609 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3610 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3611 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3612 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3613 for (i = 0; i < team->t.t_nproc; ++i) {
3614 __kmp_printf(
" Thread %2d: ", i);
3615 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3617 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3623 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3625 __kmp_print_structure_thread(
"Thread pool: ",
3626 CCAST(kmp_info_t *, __kmp_thread_pool));
3627 __kmp_print_structure_team(
"Team pool: ",
3628 CCAST(kmp_team_t *, __kmp_team_pool));
3632 while (list != NULL) {
3633 kmp_team_list_item_t *item = list;
3635 KMP_INTERNAL_FREE(item);
3644static const unsigned __kmp_primes[] = {
3645 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3646 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3647 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3648 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3649 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3650 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3651 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3652 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3653 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3654 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3655 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3659unsigned short __kmp_get_random(kmp_info_t *thread) {
3660 unsigned x = thread->th.th_x;
3661 unsigned short r = (
unsigned short)(x >> 16);
3663 thread->th.th_x = x * thread->th.th_a + 1;
3665 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3666 thread->th.th_info.ds.ds_tid, r));
3672void __kmp_init_random(kmp_info_t *thread) {
3673 unsigned seed = thread->th.th_info.ds.ds_tid;
3676 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3677 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3679 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3685static int __kmp_reclaim_dead_roots(
void) {
3688 for (i = 0; i < __kmp_threads_capacity; ++i) {
3689 if (KMP_UBER_GTID(i) &&
3690 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3693 r += __kmp_unregister_root_other_thread(i);
3718static int __kmp_expand_threads(
int nNeed) {
3720 int minimumRequiredCapacity;
3722 kmp_info_t **newThreads;
3723 kmp_root_t **newRoot;
3729#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3732 added = __kmp_reclaim_dead_roots();
3761 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3764 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3768 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3770 newCapacity = __kmp_threads_capacity;
3772 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3773 : __kmp_sys_max_nth;
3774 }
while (newCapacity < minimumRequiredCapacity);
3775 newThreads = (kmp_info_t **)__kmp_allocate(
3776 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3778 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3779 KMP_MEMCPY(newThreads, __kmp_threads,
3780 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3781 KMP_MEMCPY(newRoot, __kmp_root,
3782 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3785 kmp_old_threads_list_t *node =
3786 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3787 node->threads = __kmp_threads;
3788 node->next = __kmp_old_threads_list;
3789 __kmp_old_threads_list = node;
3791 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3792 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3793 added += newCapacity - __kmp_threads_capacity;
3794 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3796 if (newCapacity > __kmp_tp_capacity) {
3797 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3798 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3799 __kmp_threadprivate_resize_cache(newCapacity);
3801 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3803 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3812int __kmp_register_root(
int initial_thread) {
3813 kmp_info_t *root_thread;
3817 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3818 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3835 capacity = __kmp_threads_capacity;
3836 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3843 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3844 capacity -= __kmp_hidden_helper_threads_num;
3848 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3849 if (__kmp_tp_cached) {
3850 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3851 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3852 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3854 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3864 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3867 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3868 gtid <= __kmp_hidden_helper_threads_num;
3871 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3872 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3873 "hidden helper thread: T#%d\n",
3879 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3882 for (gtid = __kmp_hidden_helper_threads_num + 1;
3883 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3887 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3888 KMP_ASSERT(gtid < __kmp_threads_capacity);
3893 TCW_4(__kmp_nth, __kmp_nth + 1);
3897 if (__kmp_adjust_gtid_mode) {
3898 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3899 if (TCR_4(__kmp_gtid_mode) != 2) {
3900 TCW_4(__kmp_gtid_mode, 2);
3903 if (TCR_4(__kmp_gtid_mode) != 1) {
3904 TCW_4(__kmp_gtid_mode, 1);
3909#ifdef KMP_ADJUST_BLOCKTIME
3912 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3913 if (__kmp_nth > __kmp_avail_proc) {
3914 __kmp_zero_bt = TRUE;
3920 if (!(root = __kmp_root[gtid])) {
3921 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3922 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3925#if KMP_STATS_ENABLED
3927 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3928 __kmp_stats_thread_ptr->startLife();
3929 KMP_SET_THREAD_STATE(SERIAL_REGION);
3932 __kmp_initialize_root(root);
3935 if (root->r.r_uber_thread) {
3936 root_thread = root->r.r_uber_thread;
3938 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3939 if (__kmp_storage_map) {
3940 __kmp_print_thread_storage_map(root_thread, gtid);
3942 root_thread->th.th_info.ds.ds_gtid = gtid;
3944 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3946 root_thread->th.th_root = root;
3947 if (__kmp_env_consistency_check) {
3948 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3951 __kmp_initialize_fast_memory(root_thread);
3955 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3956 __kmp_initialize_bget(root_thread);
3958 __kmp_init_random(root_thread);
3962 if (!root_thread->th.th_serial_team) {
3963 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3964 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3965 root_thread->th.th_serial_team = __kmp_allocate_team(
3970 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3972 KMP_ASSERT(root_thread->th.th_serial_team);
3973 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3974 root_thread->th.th_serial_team));
3977 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3979 root->r.r_root_team->t.t_threads[0] = root_thread;
3980 root->r.r_hot_team->t.t_threads[0] = root_thread;
3981 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3983 root_thread->th.th_serial_team->t.t_serialized = 0;
3984 root->r.r_uber_thread = root_thread;
3987 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3988 TCW_4(__kmp_init_gtid, TRUE);
3991 __kmp_gtid_set_specific(gtid);
3994 __kmp_itt_thread_name(gtid);
3997#ifdef KMP_TDATA_GTID
4000 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
4001 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
4003 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
4005 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
4006 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
4007 KMP_INIT_BARRIER_STATE));
4010 for (b = 0; b < bs_last_barrier; ++b) {
4011 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4013 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4017 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4018 KMP_INIT_BARRIER_STATE);
4020#if KMP_AFFINITY_SUPPORTED
4021 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4022 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4023 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4024 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4026 root_thread->th.th_def_allocator = __kmp_def_allocator;
4027 root_thread->th.th_prev_level = 0;
4028 root_thread->th.th_prev_num_threads = 1;
4030 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4031 tmp->cg_root = root_thread;
4032 tmp->cg_thread_limit = __kmp_cg_max_nth;
4033 tmp->cg_nthreads = 1;
4034 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4035 " cg_nthreads init to 1\n",
4038 root_thread->th.th_cg_roots = tmp;
4040 __kmp_root_counter++;
4043 if (ompt_enabled.enabled) {
4045 kmp_info_t *root_thread = ompt_get_thread();
4047 ompt_set_thread_state(root_thread, ompt_state_overhead);
4049 if (ompt_enabled.ompt_callback_thread_begin) {
4050 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4051 ompt_thread_initial, __ompt_get_thread_data_internal());
4053 ompt_data_t *task_data;
4054 ompt_data_t *parallel_data;
4055 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4057 if (ompt_enabled.ompt_callback_implicit_task) {
4058 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4059 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4062 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4066 if (ompd_state & OMPD_ENABLE_BP)
4067 ompd_bp_thread_begin();
4071 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4076#if KMP_NESTED_HOT_TEAMS
4077static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4078 const int max_level) {
4080 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4081 if (!hot_teams || !hot_teams[level].hot_team) {
4084 KMP_DEBUG_ASSERT(level < max_level);
4085 kmp_team_t *team = hot_teams[level].hot_team;
4086 nth = hot_teams[level].hot_team_nth;
4088 if (level < max_level - 1) {
4089 for (i = 0; i < nth; ++i) {
4090 kmp_info_t *th = team->t.t_threads[i];
4091 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4092 if (i > 0 && th->th.th_hot_teams) {
4093 __kmp_free(th->th.th_hot_teams);
4094 th->th.th_hot_teams = NULL;
4098 __kmp_free_team(root, team, NULL);
4105static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4106 kmp_team_t *root_team = root->r.r_root_team;
4107 kmp_team_t *hot_team = root->r.r_hot_team;
4108 int n = hot_team->t.t_nproc;
4111 KMP_DEBUG_ASSERT(!root->r.r_active);
4113 root->r.r_root_team = NULL;
4114 root->r.r_hot_team = NULL;
4117 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4118#if KMP_NESTED_HOT_TEAMS
4119 if (__kmp_hot_teams_max_level >
4121 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4122 kmp_info_t *th = hot_team->t.t_threads[i];
4123 if (__kmp_hot_teams_max_level > 1) {
4124 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4126 if (th->th.th_hot_teams) {
4127 __kmp_free(th->th.th_hot_teams);
4128 th->th.th_hot_teams = NULL;
4133 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4138 if (__kmp_tasking_mode != tskm_immediate_exec) {
4139 __kmp_wait_to_unref_task_teams();
4145 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4147 (LPVOID) & (root->r.r_uber_thread->th),
4148 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4149 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4153 if (ompd_state & OMPD_ENABLE_BP)
4154 ompd_bp_thread_end();
4158 ompt_data_t *task_data;
4159 ompt_data_t *parallel_data;
4160 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4162 if (ompt_enabled.ompt_callback_implicit_task) {
4163 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4164 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4166 if (ompt_enabled.ompt_callback_thread_end) {
4167 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4168 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4174 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4175 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4177 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4178 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4181 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4182 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4183 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4184 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4185 root->r.r_uber_thread->th.th_cg_roots = NULL;
4187 __kmp_reap_thread(root->r.r_uber_thread, 1);
4191 root->r.r_uber_thread = NULL;
4193 root->r.r_begin = FALSE;
4198void __kmp_unregister_root_current_thread(
int gtid) {
4199 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4203 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4204 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4205 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4208 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4211 kmp_root_t *root = __kmp_root[gtid];
4213 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4214 KMP_ASSERT(KMP_UBER_GTID(gtid));
4215 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4216 KMP_ASSERT(root->r.r_active == FALSE);
4220 kmp_info_t *thread = __kmp_threads[gtid];
4221 kmp_team_t *team = thread->th.th_team;
4222 kmp_task_team_t *task_team = thread->th.th_task_team;
4225 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4226 task_team->tt.tt_hidden_helper_task_encountered)) {
4229 thread->th.ompt_thread_info.state = ompt_state_undefined;
4231 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4234 __kmp_reset_root(gtid, root);
4238 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4240 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4247static int __kmp_unregister_root_other_thread(
int gtid) {
4248 kmp_root_t *root = __kmp_root[gtid];
4251 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4252 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4253 KMP_ASSERT(KMP_UBER_GTID(gtid));
4254 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4255 KMP_ASSERT(root->r.r_active == FALSE);
4257 r = __kmp_reset_root(gtid, root);
4259 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4265void __kmp_task_info() {
4267 kmp_int32 gtid = __kmp_entry_gtid();
4268 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4269 kmp_info_t *this_thr = __kmp_threads[gtid];
4270 kmp_team_t *steam = this_thr->th.th_serial_team;
4271 kmp_team_t *team = this_thr->th.th_team;
4274 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4276 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4277 team->t.t_implicit_task_taskdata[tid].td_parent);
4284static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4285 int tid,
int gtid) {
4289 KMP_DEBUG_ASSERT(this_thr != NULL);
4290 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4291 KMP_DEBUG_ASSERT(team);
4292 KMP_DEBUG_ASSERT(team->t.t_threads);
4293 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4294 kmp_info_t *master = team->t.t_threads[0];
4295 KMP_DEBUG_ASSERT(master);
4296 KMP_DEBUG_ASSERT(master->th.th_root);
4300 TCW_SYNC_PTR(this_thr->th.th_team, team);
4302 this_thr->th.th_info.ds.ds_tid = tid;
4303 this_thr->th.th_set_nproc = 0;
4304 if (__kmp_tasking_mode != tskm_immediate_exec)
4307 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4309 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4310 this_thr->th.th_set_proc_bind = proc_bind_default;
4312#if KMP_AFFINITY_SUPPORTED
4313 this_thr->th.th_new_place = this_thr->th.th_current_place;
4315 this_thr->th.th_root = master->th.th_root;
4318 this_thr->th.th_team_nproc = team->t.t_nproc;
4319 this_thr->th.th_team_master = master;
4320 this_thr->th.th_team_serialized = team->t.t_serialized;
4322 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4324 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4325 tid, gtid, this_thr, this_thr->th.th_current_task));
4327 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4330 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4331 tid, gtid, this_thr, this_thr->th.th_current_task));
4336 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4338 this_thr->th.th_local.this_construct = 0;
4340 if (!this_thr->th.th_pri_common) {
4341 this_thr->th.th_pri_common =
4342 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4343 if (__kmp_storage_map) {
4344 __kmp_print_storage_map_gtid(
4345 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4346 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4348 this_thr->th.th_pri_head = NULL;
4351 if (this_thr != master &&
4352 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4354 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4355 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4358 int i = tmp->cg_nthreads--;
4359 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4360 " on node %p of thread %p to %d\n",
4361 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4366 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4368 this_thr->th.th_cg_roots->cg_nthreads++;
4369 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4370 " node %p of thread %p to %d\n",
4371 this_thr, this_thr->th.th_cg_roots,
4372 this_thr->th.th_cg_roots->cg_root,
4373 this_thr->th.th_cg_roots->cg_nthreads));
4374 this_thr->th.th_current_task->td_icvs.thread_limit =
4375 this_thr->th.th_cg_roots->cg_thread_limit;
4380 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4383 sizeof(dispatch_private_info_t) *
4384 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4385 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4386 team->t.t_max_nproc));
4387 KMP_ASSERT(dispatch);
4388 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4389 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4391 dispatch->th_disp_index = 0;
4392 dispatch->th_doacross_buf_idx = 0;
4393 if (!dispatch->th_disp_buffer) {
4394 dispatch->th_disp_buffer =
4395 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4397 if (__kmp_storage_map) {
4398 __kmp_print_storage_map_gtid(
4399 gtid, &dispatch->th_disp_buffer[0],
4400 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4402 : __kmp_dispatch_num_buffers],
4404 "th_%d.th_dispatch.th_disp_buffer "
4405 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4406 gtid, team->t.t_id, gtid);
4409 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4412 dispatch->th_dispatch_pr_current = 0;
4413 dispatch->th_dispatch_sh_current = 0;
4415 dispatch->th_deo_fcn = 0;
4416 dispatch->th_dxo_fcn = 0;
4419 this_thr->th.th_next_pool = NULL;
4421 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4422 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4432kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4434 kmp_team_t *serial_team;
4435 kmp_info_t *new_thr;
4438 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4439 KMP_DEBUG_ASSERT(root && team);
4440#if !KMP_NESTED_HOT_TEAMS
4441 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4448 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4449 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4450 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4451 if (new_thr == __kmp_thread_pool_insert_pt) {
4452 __kmp_thread_pool_insert_pt = NULL;
4454 TCW_4(new_thr->th.th_in_pool, FALSE);
4455 __kmp_suspend_initialize_thread(new_thr);
4456 __kmp_lock_suspend_mx(new_thr);
4457 if (new_thr->th.th_active_in_pool == TRUE) {
4458 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4459 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4460 new_thr->th.th_active_in_pool = FALSE;
4462 __kmp_unlock_suspend_mx(new_thr);
4464 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4465 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4466 KMP_ASSERT(!new_thr->th.th_team);
4467 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4470 __kmp_initialize_info(new_thr, team, new_tid,
4471 new_thr->th.th_info.ds.ds_gtid);
4472 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4474 TCW_4(__kmp_nth, __kmp_nth + 1);
4476 new_thr->th.th_task_state = 0;
4478 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4480 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4484#ifdef KMP_ADJUST_BLOCKTIME
4487 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4488 if (__kmp_nth > __kmp_avail_proc) {
4489 __kmp_zero_bt = TRUE;
4498 kmp_balign_t *balign = new_thr->th.th_bar;
4499 for (b = 0; b < bs_last_barrier; ++b)
4500 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4503 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4504 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4511 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4512 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4517 if (!TCR_4(__kmp_init_monitor)) {
4518 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4519 if (!TCR_4(__kmp_init_monitor)) {
4520 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4521 TCW_4(__kmp_init_monitor, 1);
4522 __kmp_create_monitor(&__kmp_monitor);
4523 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4534 while (TCR_4(__kmp_init_monitor) < 2) {
4537 KF_TRACE(10, (
"after monitor thread has started\n"));
4540 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4547 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4549 : __kmp_hidden_helper_threads_num + 1;
4551 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4553 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4556 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4557 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4562 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4564 new_thr->th.th_nt_strict =
false;
4565 new_thr->th.th_nt_loc = NULL;
4566 new_thr->th.th_nt_sev = severity_fatal;
4567 new_thr->th.th_nt_msg = NULL;
4569 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4571#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4574 __itt_suppress_mark_range(
4575 __itt_suppress_range, __itt_suppress_threading_errors,
4576 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4577 __itt_suppress_mark_range(
4578 __itt_suppress_range, __itt_suppress_threading_errors,
4579 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4581 __itt_suppress_mark_range(
4582 __itt_suppress_range, __itt_suppress_threading_errors,
4583 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4585 __itt_suppress_mark_range(__itt_suppress_range,
4586 __itt_suppress_threading_errors,
4587 &new_thr->th.th_suspend_init_count,
4588 sizeof(new_thr->th.th_suspend_init_count));
4591 __itt_suppress_mark_range(__itt_suppress_range,
4592 __itt_suppress_threading_errors,
4593 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4594 sizeof(new_thr->th.th_bar[0].bb.b_go));
4595 __itt_suppress_mark_range(__itt_suppress_range,
4596 __itt_suppress_threading_errors,
4597 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4598 sizeof(new_thr->th.th_bar[1].bb.b_go));
4599 __itt_suppress_mark_range(__itt_suppress_range,
4600 __itt_suppress_threading_errors,
4601 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4602 sizeof(new_thr->th.th_bar[2].bb.b_go));
4604 if (__kmp_storage_map) {
4605 __kmp_print_thread_storage_map(new_thr, new_gtid);
4610 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4611 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4612 new_thr->th.th_serial_team = serial_team =
4613 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4617 proc_bind_default, &r_icvs,
4618 0 USE_NESTED_HOT_ARG(NULL));
4620 KMP_ASSERT(serial_team);
4621 serial_team->t.t_serialized = 0;
4623 serial_team->t.t_threads[0] = new_thr;
4625 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4629 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4632 __kmp_initialize_fast_memory(new_thr);
4636 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4637 __kmp_initialize_bget(new_thr);
4640 __kmp_init_random(new_thr);
4644 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4645 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4648 kmp_balign_t *balign = new_thr->th.th_bar;
4649 for (b = 0; b < bs_last_barrier; ++b) {
4650 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4651 balign[b].bb.team = NULL;
4652 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4653 balign[b].bb.use_oncore_barrier = 0;
4656 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4657 new_thr->th.th_sleep_loc_type = flag_unset;
4659 new_thr->th.th_spin_here = FALSE;
4660 new_thr->th.th_next_waiting = 0;
4662 new_thr->th.th_blocking =
false;
4665#if KMP_AFFINITY_SUPPORTED
4666 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4667 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4668 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4669 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4671 new_thr->th.th_def_allocator = __kmp_def_allocator;
4672 new_thr->th.th_prev_level = 0;
4673 new_thr->th.th_prev_num_threads = 1;
4675 TCW_4(new_thr->th.th_in_pool, FALSE);
4676 new_thr->th.th_active_in_pool = FALSE;
4677 TCW_4(new_thr->th.th_active, TRUE);
4679 new_thr->th.th_set_nested_nth = NULL;
4680 new_thr->th.th_set_nested_nth_sz = 0;
4688 if (__kmp_adjust_gtid_mode) {
4689 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4690 if (TCR_4(__kmp_gtid_mode) != 2) {
4691 TCW_4(__kmp_gtid_mode, 2);
4694 if (TCR_4(__kmp_gtid_mode) != 1) {
4695 TCW_4(__kmp_gtid_mode, 1);
4700#ifdef KMP_ADJUST_BLOCKTIME
4703 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4704 if (__kmp_nth > __kmp_avail_proc) {
4705 __kmp_zero_bt = TRUE;
4710#if KMP_AFFINITY_SUPPORTED
4712 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4717 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4718 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4720 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4722 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4733static void __kmp_reinitialize_team(kmp_team_t *team,
4734 kmp_internal_control_t *new_icvs,
4736 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4737 team->t.t_threads[0], team));
4738 KMP_DEBUG_ASSERT(team && new_icvs);
4739 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4740 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4742 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4744 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4745 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4747 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4748 team->t.t_threads[0], team));
4754static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4755 kmp_internal_control_t *new_icvs,
4757 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4760 KMP_DEBUG_ASSERT(team);
4761 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4762 KMP_DEBUG_ASSERT(team->t.t_threads);
4765 team->t.t_master_tid = 0;
4767 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4768 team->t.t_nproc = new_nproc;
4771 team->t.t_next_pool = NULL;
4775 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4776 team->t.t_invoke = NULL;
4779 team->t.t_sched.sched = new_icvs->sched.sched;
4781#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4782 team->t.t_fp_control_saved = FALSE;
4783 team->t.t_x87_fpu_control_word = 0;
4784 team->t.t_mxcsr = 0;
4787 team->t.t_construct = 0;
4789 team->t.t_ordered.dt.t_value = 0;
4790 team->t.t_master_active = FALSE;
4793 team->t.t_copypriv_data = NULL;
4796 team->t.t_copyin_counter = 0;
4799 team->t.t_control_stack_top = NULL;
4801 __kmp_reinitialize_team(team, new_icvs, loc);
4804 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4807#if KMP_AFFINITY_SUPPORTED
4808static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4809 int first,
int last,
int newp) {
4810 th->th.th_first_place = first;
4811 th->th.th_last_place = last;
4812 th->th.th_new_place = newp;
4813 if (newp != th->th.th_current_place) {
4814 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4815 team->t.t_display_affinity = 1;
4817 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4818 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4826static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4828 if (KMP_HIDDEN_HELPER_TEAM(team))
4831 kmp_info_t *master_th = team->t.t_threads[0];
4832 KMP_DEBUG_ASSERT(master_th != NULL);
4833 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4834 int first_place = master_th->th.th_first_place;
4835 int last_place = master_th->th.th_last_place;
4836 int masters_place = master_th->th.th_current_place;
4837 int num_masks = __kmp_affinity.num_masks;
4838 team->t.t_first_place = first_place;
4839 team->t.t_last_place = last_place;
4841 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4842 "bound to place %d partition = [%d,%d]\n",
4843 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4844 team->t.t_id, masters_place, first_place, last_place));
4846 switch (proc_bind) {
4848 case proc_bind_default:
4851 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4854 case proc_bind_primary: {
4856 int n_th = team->t.t_nproc;
4857 for (f = 1; f < n_th; f++) {
4858 kmp_info_t *th = team->t.t_threads[f];
4859 KMP_DEBUG_ASSERT(th != NULL);
4860 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4862 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4863 "partition = [%d,%d]\n",
4864 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4865 f, masters_place, first_place, last_place));
4869 case proc_bind_close: {
4871 int n_th = team->t.t_nproc;
4873 if (first_place <= last_place) {
4874 n_places = last_place - first_place + 1;
4876 n_places = num_masks - first_place + last_place + 1;
4878 if (n_th <= n_places) {
4879 int place = masters_place;
4880 for (f = 1; f < n_th; f++) {
4881 kmp_info_t *th = team->t.t_threads[f];
4882 KMP_DEBUG_ASSERT(th != NULL);
4884 if (place == last_place) {
4885 place = first_place;
4886 }
else if (place == (num_masks - 1)) {
4891 __kmp_set_thread_place(team, th, first_place, last_place, place);
4893 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4894 "partition = [%d,%d]\n",
4895 __kmp_gtid_from_thread(team->t.t_threads[f]),
4896 team->t.t_id, f, place, first_place, last_place));
4899 int S, rem, gap, s_count;
4900 S = n_th / n_places;
4902 rem = n_th - (S * n_places);
4903 gap = rem > 0 ? n_places / rem : n_places;
4904 int place = masters_place;
4906 for (f = 0; f < n_th; f++) {
4907 kmp_info_t *th = team->t.t_threads[f];
4908 KMP_DEBUG_ASSERT(th != NULL);
4910 __kmp_set_thread_place(team, th, first_place, last_place, place);
4913 if ((s_count == S) && rem && (gap_ct == gap)) {
4915 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4917 if (place == last_place) {
4918 place = first_place;
4919 }
else if (place == (num_masks - 1)) {
4927 }
else if (s_count == S) {
4928 if (place == last_place) {
4929 place = first_place;
4930 }
else if (place == (num_masks - 1)) {
4940 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4941 "partition = [%d,%d]\n",
4942 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4943 th->th.th_new_place, first_place, last_place));
4945 KMP_DEBUG_ASSERT(place == masters_place);
4949 case proc_bind_spread: {
4951 int n_th = team->t.t_nproc;
4954 if (first_place <= last_place) {
4955 n_places = last_place - first_place + 1;
4957 n_places = num_masks - first_place + last_place + 1;
4959 if (n_th <= n_places) {
4962 if (n_places != num_masks) {
4963 int S = n_places / n_th;
4964 int s_count, rem, gap, gap_ct;
4966 place = masters_place;
4967 rem = n_places - n_th * S;
4968 gap = rem ? n_th / rem : 1;
4971 if (update_master_only == 1)
4973 for (f = 0; f < thidx; f++) {
4974 kmp_info_t *th = team->t.t_threads[f];
4975 KMP_DEBUG_ASSERT(th != NULL);
4977 int fplace = place, nplace = place;
4979 while (s_count < S) {
4980 if (place == last_place) {
4981 place = first_place;
4982 }
else if (place == (num_masks - 1)) {
4989 if (rem && (gap_ct == gap)) {
4990 if (place == last_place) {
4991 place = first_place;
4992 }
else if (place == (num_masks - 1)) {
5000 __kmp_set_thread_place(team, th, fplace, place, nplace);
5003 if (place == last_place) {
5004 place = first_place;
5005 }
else if (place == (num_masks - 1)) {
5012 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5013 "partition = [%d,%d], num_masks: %u\n",
5014 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5015 f, th->th.th_new_place, th->th.th_first_place,
5016 th->th.th_last_place, num_masks));
5022 double current =
static_cast<double>(masters_place);
5024 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5029 if (update_master_only == 1)
5031 for (f = 0; f < thidx; f++) {
5032 first =
static_cast<int>(current);
5033 last =
static_cast<int>(current + spacing) - 1;
5034 KMP_DEBUG_ASSERT(last >= first);
5035 if (first >= n_places) {
5036 if (masters_place) {
5039 if (first == (masters_place + 1)) {
5040 KMP_DEBUG_ASSERT(f == n_th);
5043 if (last == masters_place) {
5044 KMP_DEBUG_ASSERT(f == (n_th - 1));
5048 KMP_DEBUG_ASSERT(f == n_th);
5053 if (last >= n_places) {
5054 last = (n_places - 1);
5059 KMP_DEBUG_ASSERT(0 <= first);
5060 KMP_DEBUG_ASSERT(n_places > first);
5061 KMP_DEBUG_ASSERT(0 <= last);
5062 KMP_DEBUG_ASSERT(n_places > last);
5063 KMP_DEBUG_ASSERT(last_place >= first_place);
5064 th = team->t.t_threads[f];
5065 KMP_DEBUG_ASSERT(th);
5066 __kmp_set_thread_place(team, th, first, last, place);
5068 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5069 "partition = [%d,%d], spacing = %.4f\n",
5070 __kmp_gtid_from_thread(team->t.t_threads[f]),
5071 team->t.t_id, f, th->th.th_new_place,
5072 th->th.th_first_place, th->th.th_last_place, spacing));
5076 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5078 int S, rem, gap, s_count;
5079 S = n_th / n_places;
5081 rem = n_th - (S * n_places);
5082 gap = rem > 0 ? n_places / rem : n_places;
5083 int place = masters_place;
5086 if (update_master_only == 1)
5088 for (f = 0; f < thidx; f++) {
5089 kmp_info_t *th = team->t.t_threads[f];
5090 KMP_DEBUG_ASSERT(th != NULL);
5092 __kmp_set_thread_place(team, th, place, place, place);
5095 if ((s_count == S) && rem && (gap_ct == gap)) {
5097 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5099 if (place == last_place) {
5100 place = first_place;
5101 }
else if (place == (num_masks - 1)) {
5109 }
else if (s_count == S) {
5110 if (place == last_place) {
5111 place = first_place;
5112 }
else if (place == (num_masks - 1)) {
5121 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5122 "partition = [%d,%d]\n",
5123 __kmp_gtid_from_thread(team->t.t_threads[f]),
5124 team->t.t_id, f, th->th.th_new_place,
5125 th->th.th_first_place, th->th.th_last_place));
5127 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5135 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5143__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5145 ompt_data_t ompt_parallel_data,
5147 kmp_proc_bind_t new_proc_bind,
5148 kmp_internal_control_t *new_icvs,
5149 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5150 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5153 int use_hot_team = !root->r.r_active;
5155 int do_place_partition = 1;
5157 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5158 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5159 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5162#if KMP_NESTED_HOT_TEAMS
5163 kmp_hot_team_ptr_t *hot_teams;
5165 team = master->th.th_team;
5166 level = team->t.t_active_level;
5167 if (master->th.th_teams_microtask) {
5168 if (master->th.th_teams_size.nteams > 1 &&
5171 (microtask_t)__kmp_teams_master ||
5172 master->th.th_teams_level <
5179 if ((master->th.th_teams_size.nteams == 1 &&
5180 master->th.th_teams_level >= team->t.t_level) ||
5181 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5182 do_place_partition = 0;
5184 hot_teams = master->th.th_hot_teams;
5185 if (level < __kmp_hot_teams_max_level && hot_teams &&
5186 hot_teams[level].hot_team) {
5194 KMP_DEBUG_ASSERT(new_nproc == 1);
5198 if (use_hot_team && new_nproc > 1) {
5199 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5200#if KMP_NESTED_HOT_TEAMS
5201 team = hot_teams[level].hot_team;
5203 team = root->r.r_hot_team;
5206 if (__kmp_tasking_mode != tskm_immediate_exec) {
5207 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5208 "task_team[1] = %p before reinit\n",
5209 team->t.t_task_team[0], team->t.t_task_team[1]));
5213 if (team->t.t_nproc != new_nproc &&
5214 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5216 int old_nthr = team->t.t_nproc;
5217 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5222 if (do_place_partition == 0)
5223 team->t.t_proc_bind = proc_bind_default;
5227 if (team->t.t_nproc == new_nproc) {
5228 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5231 if (team->t.t_size_changed == -1) {
5232 team->t.t_size_changed = 1;
5234 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5238 kmp_r_sched_t new_sched = new_icvs->sched;
5240 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5242 __kmp_reinitialize_team(team, new_icvs,
5243 root->r.r_uber_thread->th.th_ident);
5245 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5246 team->t.t_threads[0], team));
5247 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5249#if KMP_AFFINITY_SUPPORTED
5250 if ((team->t.t_size_changed == 0) &&
5251 (team->t.t_proc_bind == new_proc_bind)) {
5252 if (new_proc_bind == proc_bind_spread) {
5253 if (do_place_partition) {
5255 __kmp_partition_places(team, 1);
5258 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5259 "proc_bind = %d, partition = [%d,%d]\n",
5260 team->t.t_id, new_proc_bind, team->t.t_first_place,
5261 team->t.t_last_place));
5263 if (do_place_partition) {
5264 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5265 __kmp_partition_places(team);
5269 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5271 }
else if (team->t.t_nproc > new_nproc) {
5273 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5276 team->t.t_size_changed = 1;
5277 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5280 __kmp_add_threads_to_team(team, new_nproc);
5284 if (__kmp_tasking_mode != tskm_immediate_exec) {
5285 for (f = new_nproc; f < team->t.t_nproc; f++) {
5286 kmp_info_t *th = team->t.t_threads[f];
5287 KMP_DEBUG_ASSERT(th);
5288 th->th.th_task_team = NULL;
5291#if KMP_NESTED_HOT_TEAMS
5292 if (__kmp_hot_teams_mode == 0) {
5295 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5296 hot_teams[level].hot_team_nth = new_nproc;
5299 for (f = new_nproc; f < team->t.t_nproc; f++) {
5300 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5301 __kmp_free_thread(team->t.t_threads[f]);
5302 team->t.t_threads[f] = NULL;
5304#if KMP_NESTED_HOT_TEAMS
5309 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5310 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5311 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5312 for (
int b = 0; b < bs_last_barrier; ++b) {
5313 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5314 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5316 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5321 team->t.t_nproc = new_nproc;
5323 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5324 __kmp_reinitialize_team(team, new_icvs,
5325 root->r.r_uber_thread->th.th_ident);
5328 for (f = 0; f < new_nproc; ++f) {
5329 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5334 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5335 team->t.t_threads[0], team));
5337 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5340 for (f = 0; f < team->t.t_nproc; f++) {
5341 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5342 team->t.t_threads[f]->th.th_team_nproc ==
5347 if (do_place_partition) {
5348 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5349#if KMP_AFFINITY_SUPPORTED
5350 __kmp_partition_places(team);
5356 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5358 int old_nproc = team->t.t_nproc;
5359 team->t.t_size_changed = 1;
5361#if KMP_NESTED_HOT_TEAMS
5362 int avail_threads = hot_teams[level].hot_team_nth;
5363 if (new_nproc < avail_threads)
5364 avail_threads = new_nproc;
5365 kmp_info_t **other_threads = team->t.t_threads;
5366 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5370 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5371 for (b = 0; b < bs_last_barrier; ++b) {
5372 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5373 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5375 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5379 if (hot_teams[level].hot_team_nth >= new_nproc) {
5382 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5383 team->t.t_nproc = new_nproc;
5387 team->t.t_nproc = hot_teams[level].hot_team_nth;
5388 hot_teams[level].hot_team_nth = new_nproc;
5390 if (team->t.t_max_nproc < new_nproc) {
5392 __kmp_reallocate_team_arrays(team, new_nproc);
5393 __kmp_reinitialize_team(team, new_icvs, NULL);
5396#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5397 KMP_AFFINITY_SUPPORTED
5403 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5407 for (f = team->t.t_nproc; f < new_nproc; f++) {
5408 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5409 KMP_DEBUG_ASSERT(new_worker);
5410 team->t.t_threads[f] = new_worker;
5413 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5414 "join=%llu, plain=%llu\n",
5415 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5416 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5417 team->t.t_bar[bs_plain_barrier].b_arrived));
5421 kmp_balign_t *balign = new_worker->th.th_bar;
5422 for (b = 0; b < bs_last_barrier; ++b) {
5423 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5424 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5425 KMP_BARRIER_PARENT_FLAG);
5427 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5433#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5434 KMP_AFFINITY_SUPPORTED
5436 new_temp_affinity.restore();
5438#if KMP_NESTED_HOT_TEAMS
5441 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5444 __kmp_add_threads_to_team(team, new_nproc);
5448 __kmp_initialize_team(team, new_nproc, new_icvs,
5449 root->r.r_uber_thread->th.th_ident);
5452 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5453 for (f = 0; f < team->t.t_nproc; ++f)
5454 __kmp_initialize_info(team->t.t_threads[f], team, f,
5455 __kmp_gtid_from_tid(f, team));
5458 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5459 for (f = old_nproc; f < team->t.t_nproc; ++f)
5460 team->t.t_threads[f]->th.th_task_state = old_state;
5463 for (f = 0; f < team->t.t_nproc; ++f) {
5464 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5465 team->t.t_threads[f]->th.th_team_nproc ==
5470 if (do_place_partition) {
5471 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5472#if KMP_AFFINITY_SUPPORTED
5473 __kmp_partition_places(team);
5478 if (master->th.th_teams_microtask) {
5479 for (f = 1; f < new_nproc; ++f) {
5481 kmp_info_t *thr = team->t.t_threads[f];
5482 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5483 thr->th.th_teams_level = master->th.th_teams_level;
5484 thr->th.th_teams_size = master->th.th_teams_size;
5487#if KMP_NESTED_HOT_TEAMS
5491 for (f = 1; f < new_nproc; ++f) {
5492 kmp_info_t *thr = team->t.t_threads[f];
5494 kmp_balign_t *balign = thr->th.th_bar;
5495 for (b = 0; b < bs_last_barrier; ++b) {
5496 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5497 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5499 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5507 __kmp_alloc_argv_entries(argc, team, TRUE);
5508 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5512 KF_TRACE(10, (
" hot_team = %p\n", team));
5515 if (__kmp_tasking_mode != tskm_immediate_exec) {
5516 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5517 "task_team[1] = %p after reinit\n",
5518 team->t.t_task_team[0], team->t.t_task_team[1]));
5523 __ompt_team_assign_id(team, ompt_parallel_data);
5533 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5536 if (team->t.t_max_nproc >= max_nproc) {
5538 __kmp_team_pool = team->t.t_next_pool;
5540 if (max_nproc > 1 &&
5541 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5543 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5548 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5550 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5551 "task_team[1] %p to NULL\n",
5552 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5553 team->t.t_task_team[0] = NULL;
5554 team->t.t_task_team[1] = NULL;
5557 __kmp_alloc_argv_entries(argc, team, TRUE);
5558 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5561 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5562 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5565 for (b = 0; b < bs_last_barrier; ++b) {
5566 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5568 team->t.t_bar[b].b_master_arrived = 0;
5569 team->t.t_bar[b].b_team_arrived = 0;
5574 team->t.t_proc_bind = new_proc_bind;
5576 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5580 __ompt_team_assign_id(team, ompt_parallel_data);
5583 team->t.t_nested_nth = NULL;
5594 team = __kmp_reap_team(team);
5595 __kmp_team_pool = team;
5600 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5603 team->t.t_max_nproc = max_nproc;
5604 if (max_nproc > 1 &&
5605 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5607 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5612 __kmp_allocate_team_arrays(team, max_nproc);
5614 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5615 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5617 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5619 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5620 team->t.t_task_team[0] = NULL;
5622 team->t.t_task_team[1] = NULL;
5625 if (__kmp_storage_map) {
5626 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5630 __kmp_alloc_argv_entries(argc, team, FALSE);
5631 team->t.t_argc = argc;
5634 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5635 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5638 for (b = 0; b < bs_last_barrier; ++b) {
5639 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5641 team->t.t_bar[b].b_master_arrived = 0;
5642 team->t.t_bar[b].b_team_arrived = 0;
5647 team->t.t_proc_bind = new_proc_bind;
5650 __ompt_team_assign_id(team, ompt_parallel_data);
5651 team->t.ompt_serialized_team_info = NULL;
5656 team->t.t_nested_nth = NULL;
5658 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5669void __kmp_free_team(kmp_root_t *root,
5670 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5672 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5676 KMP_DEBUG_ASSERT(root);
5677 KMP_DEBUG_ASSERT(team);
5678 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5679 KMP_DEBUG_ASSERT(team->t.t_threads);
5681 int use_hot_team = team == root->r.r_hot_team;
5682#if KMP_NESTED_HOT_TEAMS
5685 level = team->t.t_active_level - 1;
5686 if (master->th.th_teams_microtask) {
5687 if (master->th.th_teams_size.nteams > 1) {
5691 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5692 master->th.th_teams_level == team->t.t_level) {
5698 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5700 if (level < __kmp_hot_teams_max_level) {
5701 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5708 TCW_SYNC_PTR(team->t.t_pkfn,
5711 team->t.t_copyin_counter = 0;
5716 if (!use_hot_team) {
5717 if (__kmp_tasking_mode != tskm_immediate_exec) {
5719 for (f = 1; f < team->t.t_nproc; ++f) {
5720 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5721 kmp_info_t *th = team->t.t_threads[f];
5722 volatile kmp_uint32 *state = &th->th.th_reap_state;
5723 while (*state != KMP_SAFE_TO_REAP) {
5727 if (!__kmp_is_thread_alive(th, &ecode)) {
5728 *state = KMP_SAFE_TO_REAP;
5733 if (th->th.th_sleep_loc)
5734 __kmp_null_resume_wrapper(th);
5741 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5742 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5743 if (task_team != NULL) {
5744 for (f = 0; f < team->t.t_nproc; ++f) {
5745 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5746 team->t.t_threads[f]->th.th_task_team = NULL;
5750 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5751 __kmp_get_gtid(), task_team, team->t.t_id));
5752#if KMP_NESTED_HOT_TEAMS
5753 __kmp_free_task_team(master, task_team);
5755 team->t.t_task_team[tt_idx] = NULL;
5761 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5762 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5763 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5764 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5766 team->t.t_nested_nth = NULL;
5769 team->t.t_parent = NULL;
5770 team->t.t_level = 0;
5771 team->t.t_active_level = 0;
5774 for (f = 1; f < team->t.t_nproc; ++f) {
5775 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5776 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5777 (void)KMP_COMPARE_AND_STORE_ACQ32(
5778 &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
5780 __kmp_free_thread(team->t.t_threads[f]);
5783 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5786 team->t.b->go_release();
5787 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5788 for (f = 1; f < team->t.t_nproc; ++f) {
5789 if (team->t.b->sleep[f].sleep) {
5790 __kmp_atomic_resume_64(
5791 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5792 (kmp_atomic_flag_64<> *)NULL);
5797 for (
int f = 1; f < team->t.t_nproc; ++f) {
5798 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5804 for (f = 1; f < team->t.t_nproc; ++f) {
5805 team->t.t_threads[f] = NULL;
5808 if (team->t.t_max_nproc > 1 &&
5809 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5810 distributedBarrier::deallocate(team->t.b);
5815 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5816 __kmp_team_pool = (
volatile kmp_team_t *)team;
5819 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5820 team->t.t_threads[1]->th.th_cg_roots);
5821 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5823 for (f = 1; f < team->t.t_nproc; ++f) {
5824 kmp_info_t *thr = team->t.t_threads[f];
5825 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5826 thr->th.th_cg_roots->cg_root == thr);
5828 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5829 thr->th.th_cg_roots = tmp->up;
5830 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5831 " up to node %p. cg_nthreads was %d\n",
5832 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5833 int i = tmp->cg_nthreads--;
5838 if (thr->th.th_cg_roots)
5839 thr->th.th_current_task->td_icvs.thread_limit =
5840 thr->th.th_cg_roots->cg_thread_limit;
5849kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5850 kmp_team_t *next_pool = team->t.t_next_pool;
5852 KMP_DEBUG_ASSERT(team);
5853 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5854 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5855 KMP_DEBUG_ASSERT(team->t.t_threads);
5856 KMP_DEBUG_ASSERT(team->t.t_argv);
5861 __kmp_free_team_arrays(team);
5862 if (team->t.t_argv != &team->t.t_inline_argv[0])
5863 __kmp_free((
void *)team->t.t_argv);
5895void __kmp_free_thread(kmp_info_t *this_th) {
5899 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5900 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5902 KMP_DEBUG_ASSERT(this_th);
5907 kmp_balign_t *balign = this_th->th.th_bar;
5908 for (b = 0; b < bs_last_barrier; ++b) {
5909 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5910 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5911 balign[b].bb.team = NULL;
5912 balign[b].bb.leaf_kids = 0;
5914 this_th->th.th_task_state = 0;
5915 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5918 TCW_PTR(this_th->th.th_team, NULL);
5919 TCW_PTR(this_th->th.th_root, NULL);
5920 TCW_PTR(this_th->th.th_dispatch, NULL);
5922 while (this_th->th.th_cg_roots) {
5923 this_th->th.th_cg_roots->cg_nthreads--;
5924 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5925 " %p of thread %p to %d\n",
5926 this_th, this_th->th.th_cg_roots,
5927 this_th->th.th_cg_roots->cg_root,
5928 this_th->th.th_cg_roots->cg_nthreads));
5929 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5930 if (tmp->cg_root == this_th) {
5931 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5933 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5934 this_th->th.th_cg_roots = tmp->up;
5937 if (tmp->cg_nthreads == 0) {
5940 this_th->th.th_cg_roots = NULL;
5950 __kmp_free_implicit_task(this_th);
5951 this_th->th.th_current_task = NULL;
5955 gtid = this_th->th.th_info.ds.ds_gtid;
5956 if (__kmp_thread_pool_insert_pt != NULL) {
5957 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5958 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5959 __kmp_thread_pool_insert_pt = NULL;
5968 if (__kmp_thread_pool_insert_pt != NULL) {
5969 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5971 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5973 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5974 scan = &((*scan)->th.th_next_pool))
5979 TCW_PTR(this_th->th.th_next_pool, *scan);
5980 __kmp_thread_pool_insert_pt = *scan = this_th;
5981 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5982 (this_th->th.th_info.ds.ds_gtid <
5983 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5984 TCW_4(this_th->th.th_in_pool, TRUE);
5985 __kmp_suspend_initialize_thread(this_th);
5986 __kmp_lock_suspend_mx(this_th);
5987 if (this_th->th.th_active == TRUE) {
5988 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5989 this_th->th.th_active_in_pool = TRUE;
5993 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5996 __kmp_unlock_suspend_mx(this_th);
5998 TCW_4(__kmp_nth, __kmp_nth - 1);
6000#ifdef KMP_ADJUST_BLOCKTIME
6003 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6004 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6005 if (__kmp_nth <= __kmp_avail_proc) {
6006 __kmp_zero_bt = FALSE;
6016void *__kmp_launch_thread(kmp_info_t *this_thr) {
6017#if OMP_PROFILING_SUPPORT
6018 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6020 if (ProfileTraceFile)
6021 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6024 int gtid = this_thr->th.th_info.ds.ds_gtid;
6026 kmp_team_t **
volatile pteam;
6029 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6031 if (__kmp_env_consistency_check) {
6032 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6036 if (ompd_state & OMPD_ENABLE_BP)
6037 ompd_bp_thread_begin();
6041 ompt_data_t *thread_data =
nullptr;
6042 if (ompt_enabled.enabled) {
6043 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6044 *thread_data = ompt_data_none;
6046 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6047 this_thr->th.ompt_thread_info.wait_id = 0;
6048 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6049 this_thr->th.ompt_thread_info.parallel_flags = 0;
6050 if (ompt_enabled.ompt_callback_thread_begin) {
6051 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6052 ompt_thread_worker, thread_data);
6054 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6059 while (!TCR_4(__kmp_global.g.g_done)) {
6060 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6064 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6067 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6070 if (ompt_enabled.enabled) {
6071 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6075 pteam = &this_thr->th.th_team;
6078 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6080 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6083 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6084 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6085 (*pteam)->t.t_pkfn));
6087 updateHWFPControl(*pteam);
6090 if (ompt_enabled.enabled) {
6091 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6095 rc = (*pteam)->t.t_invoke(gtid);
6099 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6100 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6101 (*pteam)->t.t_pkfn));
6104 if (ompt_enabled.enabled) {
6106 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6108 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6112 __kmp_join_barrier(gtid);
6117 if (ompd_state & OMPD_ENABLE_BP)
6118 ompd_bp_thread_end();
6122 if (ompt_enabled.ompt_callback_thread_end) {
6123 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6127 this_thr->th.th_task_team = NULL;
6129 __kmp_common_destroy_gtid(gtid);
6131 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6134#if OMP_PROFILING_SUPPORT
6135 llvm::timeTraceProfilerFinishThread();
6142void __kmp_internal_end_dest(
void *specific_gtid) {
6145 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6147 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6151 __kmp_internal_end_thread(gtid);
6154#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6156__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6157 __kmp_internal_end_atexit();
6164void __kmp_internal_end_atexit(
void) {
6165 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6189 __kmp_internal_end_library(-1);
6191 __kmp_close_console();
6195static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6200 KMP_DEBUG_ASSERT(thread != NULL);
6202 gtid = thread->th.th_info.ds.ds_gtid;
6205 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6208 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6210 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6212 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6214 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6218 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6220 __kmp_release_64(&flag);
6225 __kmp_reap_worker(thread);
6237 if (thread->th.th_active_in_pool) {
6238 thread->th.th_active_in_pool = FALSE;
6239 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6240 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6244 __kmp_free_implicit_task(thread);
6248 __kmp_free_fast_memory(thread);
6251 __kmp_suspend_uninitialize_thread(thread);
6253 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6254 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6259#ifdef KMP_ADJUST_BLOCKTIME
6262 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6263 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6264 if (__kmp_nth <= __kmp_avail_proc) {
6265 __kmp_zero_bt = FALSE;
6271 if (__kmp_env_consistency_check) {
6272 if (thread->th.th_cons) {
6273 __kmp_free_cons_stack(thread->th.th_cons);
6274 thread->th.th_cons = NULL;
6278 if (thread->th.th_pri_common != NULL) {
6279 __kmp_free(thread->th.th_pri_common);
6280 thread->th.th_pri_common = NULL;
6284 if (thread->th.th_local.bget_data != NULL) {
6285 __kmp_finalize_bget(thread);
6289#if KMP_AFFINITY_SUPPORTED
6290 if (thread->th.th_affin_mask != NULL) {
6291 KMP_CPU_FREE(thread->th.th_affin_mask);
6292 thread->th.th_affin_mask = NULL;
6296#if KMP_USE_HIER_SCHED
6297 if (thread->th.th_hier_bar_data != NULL) {
6298 __kmp_free(thread->th.th_hier_bar_data);
6299 thread->th.th_hier_bar_data = NULL;
6303 __kmp_reap_team(thread->th.th_serial_team);
6304 thread->th.th_serial_team = NULL;
6311static void __kmp_itthash_clean(kmp_info_t *th) {
6313 if (__kmp_itt_region_domains.count > 0) {
6314 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6315 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6317 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6318 __kmp_thread_free(th, bucket);
6323 if (__kmp_itt_barrier_domains.count > 0) {
6324 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6325 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6327 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6328 __kmp_thread_free(th, bucket);
6336static void __kmp_internal_end(
void) {
6340 __kmp_unregister_library();
6347 __kmp_reclaim_dead_roots();
6351 for (i = 0; i < __kmp_threads_capacity; i++)
6353 if (__kmp_root[i]->r.r_active)
6356 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6358 if (i < __kmp_threads_capacity) {
6370 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6371 if (TCR_4(__kmp_init_monitor)) {
6372 __kmp_reap_monitor(&__kmp_monitor);
6373 TCW_4(__kmp_init_monitor, 0);
6375 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6376 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6382 for (i = 0; i < __kmp_threads_capacity; i++) {
6383 if (__kmp_root[i]) {
6386 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6395 while (__kmp_thread_pool != NULL) {
6397 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6398 __kmp_thread_pool = thread->th.th_next_pool;
6400 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6401 thread->th.th_next_pool = NULL;
6402 thread->th.th_in_pool = FALSE;
6403 __kmp_reap_thread(thread, 0);
6405 __kmp_thread_pool_insert_pt = NULL;
6408 while (__kmp_team_pool != NULL) {
6410 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6411 __kmp_team_pool = team->t.t_next_pool;
6413 team->t.t_next_pool = NULL;
6414 __kmp_reap_team(team);
6417 __kmp_reap_task_teams();
6424 for (i = 0; i < __kmp_threads_capacity; i++) {
6425 kmp_info_t *thr = __kmp_threads[i];
6426 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6431 for (i = 0; i < __kmp_threads_capacity; ++i) {
6438 TCW_SYNC_4(__kmp_init_common, FALSE);
6440 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6448 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6449 if (TCR_4(__kmp_init_monitor)) {
6450 __kmp_reap_monitor(&__kmp_monitor);
6451 TCW_4(__kmp_init_monitor, 0);
6453 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6454 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6457 TCW_4(__kmp_init_gtid, FALSE);
6466void __kmp_internal_end_library(
int gtid_req) {
6473 if (__kmp_global.g.g_abort) {
6474 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6478 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6479 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6484 if (TCR_4(__kmp_init_hidden_helper) &&
6485 !TCR_4(__kmp_hidden_helper_team_done)) {
6486 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6488 __kmp_hidden_helper_main_thread_release();
6490 __kmp_hidden_helper_threads_deinitz_wait();
6496 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6498 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6499 if (gtid == KMP_GTID_SHUTDOWN) {
6500 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6501 "already shutdown\n"));
6503 }
else if (gtid == KMP_GTID_MONITOR) {
6504 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6505 "registered, or system shutdown\n"));
6507 }
else if (gtid == KMP_GTID_DNE) {
6508 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6511 }
else if (KMP_UBER_GTID(gtid)) {
6513 if (__kmp_root[gtid]->r.r_active) {
6514 __kmp_global.g.g_abort = -1;
6515 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6516 __kmp_unregister_library();
6518 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6522 __kmp_itthash_clean(__kmp_threads[gtid]);
6525 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6526 __kmp_unregister_root_current_thread(gtid);
6533#ifdef DUMP_DEBUG_ON_EXIT
6534 if (__kmp_debug_buf)
6535 __kmp_dump_debug_buffer();
6540 __kmp_unregister_library();
6545 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6548 if (__kmp_global.g.g_abort) {
6549 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6551 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6554 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6555 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6564 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6567 __kmp_internal_end();
6569 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6570 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6572 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6574#ifdef DUMP_DEBUG_ON_EXIT
6575 if (__kmp_debug_buf)
6576 __kmp_dump_debug_buffer();
6580 __kmp_close_console();
6583 __kmp_fini_allocator();
6587void __kmp_internal_end_thread(
int gtid_req) {
6596 if (__kmp_global.g.g_abort) {
6597 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6601 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6602 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6607 if (TCR_4(__kmp_init_hidden_helper) &&
6608 !TCR_4(__kmp_hidden_helper_team_done)) {
6609 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6611 __kmp_hidden_helper_main_thread_release();
6613 __kmp_hidden_helper_threads_deinitz_wait();
6620 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6622 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6623 if (gtid == KMP_GTID_SHUTDOWN) {
6624 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6625 "already shutdown\n"));
6627 }
else if (gtid == KMP_GTID_MONITOR) {
6628 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6629 "registered, or system shutdown\n"));
6631 }
else if (gtid == KMP_GTID_DNE) {
6632 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6636 }
else if (KMP_UBER_GTID(gtid)) {
6638 if (__kmp_root[gtid]->r.r_active) {
6639 __kmp_global.g.g_abort = -1;
6640 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6642 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6646 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6648 __kmp_unregister_root_current_thread(gtid);
6652 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6655 __kmp_threads[gtid]->th.th_task_team = NULL;
6659 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6665 if (__kmp_pause_status != kmp_hard_paused)
6669 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6674 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6677 if (__kmp_global.g.g_abort) {
6678 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6680 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6683 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6684 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6695 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6697 for (i = 0; i < __kmp_threads_capacity; ++i) {
6698 if (KMP_UBER_GTID(i)) {
6701 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6702 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6703 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6710 __kmp_internal_end();
6712 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6713 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6715 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6717#ifdef DUMP_DEBUG_ON_EXIT
6718 if (__kmp_debug_buf)
6719 __kmp_dump_debug_buffer();
6726static long __kmp_registration_flag = 0;
6728static char *__kmp_registration_str = NULL;
6731static inline char *__kmp_reg_status_name() {
6737#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6738 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6741 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6745#if defined(KMP_USE_SHM)
6746bool __kmp_shm_available =
false;
6747bool __kmp_tmp_available =
false;
6749char *temp_reg_status_file_name =
nullptr;
6752void __kmp_register_library_startup(
void) {
6754 char *name = __kmp_reg_status_name();
6760#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6761 __kmp_initialize_system_tick();
6763 __kmp_read_system_time(&time.dtime);
6764 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6765 __kmp_registration_str =
6766 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6767 __kmp_registration_flag, KMP_LIBRARY_FILE);
6769 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6770 __kmp_registration_str));
6776#if defined(KMP_USE_SHM)
6777 char *shm_name =
nullptr;
6778 char *data1 =
nullptr;
6779 __kmp_shm_available = __kmp_detect_shm();
6780 if (__kmp_shm_available) {
6782 shm_name = __kmp_str_format(
"/%s", name);
6783 int shm_preexist = 0;
6784 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6785 if ((fd1 == -1) && (errno == EEXIST)) {
6788 fd1 = shm_open(shm_name, O_RDWR, 0600);
6790 KMP_WARNING(FunctionError,
"Can't open SHM");
6791 __kmp_shm_available =
false;
6796 if (__kmp_shm_available && shm_preexist == 0) {
6797 if (ftruncate(fd1, SHM_SIZE) == -1) {
6798 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6799 __kmp_shm_available =
false;
6802 if (__kmp_shm_available) {
6803 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6805 if (data1 == MAP_FAILED) {
6806 KMP_WARNING(FunctionError,
"Can't map SHM");
6807 __kmp_shm_available =
false;
6810 if (__kmp_shm_available) {
6811 if (shm_preexist == 0) {
6812 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6815 value = __kmp_str_format(
"%s", data1);
6816 munmap(data1, SHM_SIZE);
6821 if (!__kmp_shm_available)
6822 __kmp_tmp_available = __kmp_detect_tmp();
6823 if (!__kmp_shm_available && __kmp_tmp_available) {
6830 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6831 int tmp_preexist = 0;
6832 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6833 if ((fd1 == -1) && (errno == EEXIST)) {
6836 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6838 KMP_WARNING(FunctionError,
"Can't open TEMP");
6839 __kmp_tmp_available =
false;
6844 if (__kmp_tmp_available && tmp_preexist == 0) {
6846 if (ftruncate(fd1, SHM_SIZE) == -1) {
6847 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6848 __kmp_tmp_available =
false;
6851 if (__kmp_tmp_available) {
6852 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6854 if (data1 == MAP_FAILED) {
6855 KMP_WARNING(FunctionError,
"Can't map /tmp");
6856 __kmp_tmp_available =
false;
6859 if (__kmp_tmp_available) {
6860 if (tmp_preexist == 0) {
6861 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6864 value = __kmp_str_format(
"%s", data1);
6865 munmap(data1, SHM_SIZE);
6870 if (!__kmp_shm_available && !__kmp_tmp_available) {
6873 __kmp_env_set(name, __kmp_registration_str, 0);
6875 value = __kmp_env_get(name);
6879 __kmp_env_set(name, __kmp_registration_str, 0);
6881 value = __kmp_env_get(name);
6884 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6891 char *flag_addr_str = NULL;
6892 char *flag_val_str = NULL;
6893 char const *file_name = NULL;
6894 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6895 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6898 unsigned long *flag_addr = 0;
6899 unsigned long flag_val = 0;
6900 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6901 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6902 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6906 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6920 file_name =
"unknown library";
6925 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6926 if (!__kmp_str_match_true(duplicate_ok)) {
6928 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6929 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6931 KMP_INTERNAL_FREE(duplicate_ok);
6932 __kmp_duplicate_library_ok = 1;
6937#if defined(KMP_USE_SHM)
6938 if (__kmp_shm_available) {
6939 shm_unlink(shm_name);
6940 }
else if (__kmp_tmp_available) {
6941 unlink(temp_reg_status_file_name);
6944 __kmp_env_unset(name);
6948 __kmp_env_unset(name);
6952 KMP_DEBUG_ASSERT(0);
6956 KMP_INTERNAL_FREE((
void *)value);
6957#if defined(KMP_USE_SHM)
6959 KMP_INTERNAL_FREE((
void *)shm_name);
6962 KMP_INTERNAL_FREE((
void *)name);
6966void __kmp_unregister_library(
void) {
6968 char *name = __kmp_reg_status_name();
6971#if defined(KMP_USE_SHM)
6972 char *shm_name =
nullptr;
6974 if (__kmp_shm_available) {
6975 shm_name = __kmp_str_format(
"/%s", name);
6976 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6978 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6979 if (data1 != MAP_FAILED) {
6980 value = __kmp_str_format(
"%s", data1);
6981 munmap(data1, SHM_SIZE);
6985 }
else if (__kmp_tmp_available) {
6986 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6988 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6989 if (data1 != MAP_FAILED) {
6990 value = __kmp_str_format(
"%s", data1);
6991 munmap(data1, SHM_SIZE);
6996 value = __kmp_env_get(name);
6999 value = __kmp_env_get(name);
7002 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
7003 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
7004 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
7006#if defined(KMP_USE_SHM)
7007 if (__kmp_shm_available) {
7008 shm_unlink(shm_name);
7009 }
else if (__kmp_tmp_available) {
7010 unlink(temp_reg_status_file_name);
7012 __kmp_env_unset(name);
7015 __kmp_env_unset(name);
7019#if defined(KMP_USE_SHM)
7021 KMP_INTERNAL_FREE(shm_name);
7022 if (temp_reg_status_file_name)
7023 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7026 KMP_INTERNAL_FREE(__kmp_registration_str);
7027 KMP_INTERNAL_FREE(value);
7028 KMP_INTERNAL_FREE(name);
7030 __kmp_registration_flag = 0;
7031 __kmp_registration_str = NULL;
7038#if KMP_MIC_SUPPORTED
7040static void __kmp_check_mic_type() {
7041 kmp_cpuid_t cpuid_state = {0};
7042 kmp_cpuid_t *cs_p = &cpuid_state;
7043 __kmp_x86_cpuid(1, 0, cs_p);
7045 if ((cs_p->eax & 0xff0) == 0xB10) {
7046 __kmp_mic_type = mic2;
7047 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7048 __kmp_mic_type = mic3;
7050 __kmp_mic_type = non_mic;
7057static void __kmp_user_level_mwait_init() {
7058 struct kmp_cpuid buf;
7059 __kmp_x86_cpuid(7, 0, &buf);
7060 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7061 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7062 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7063 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7064 __kmp_umwait_enabled));
7067#ifndef AT_INTELPHIUSERMWAIT
7070#define AT_INTELPHIUSERMWAIT 10000
7075unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7076unsigned long getauxval(
unsigned long) {
return 0; }
7078static void __kmp_user_level_mwait_init() {
7083 if (__kmp_mic_type == mic3) {
7084 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7085 if ((res & 0x1) || __kmp_user_level_mwait) {
7086 __kmp_mwait_enabled = TRUE;
7087 if (__kmp_user_level_mwait) {
7088 KMP_INFORM(EnvMwaitWarn);
7091 __kmp_mwait_enabled = FALSE;
7094 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7095 "__kmp_mwait_enabled = %d\n",
7096 __kmp_mic_type, __kmp_mwait_enabled));
7100static void __kmp_do_serial_initialize(
void) {
7104 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7106 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7107 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7108 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7109 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7110 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7120 __kmp_validate_locks();
7122#if ENABLE_LIBOMPTARGET
7124 __kmp_init_omptarget();
7128 __kmp_init_allocator();
7134 if (__kmp_need_register_serial)
7135 __kmp_register_library_startup();
7138 if (TCR_4(__kmp_global.g.g_done)) {
7139 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7142 __kmp_global.g.g_abort = 0;
7143 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7146#if KMP_USE_ADAPTIVE_LOCKS
7147#if KMP_DEBUG_ADAPTIVE_LOCKS
7148 __kmp_init_speculative_stats();
7151#if KMP_STATS_ENABLED
7154 __kmp_init_lock(&__kmp_global_lock);
7155 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7156 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7157 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7158 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7159 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7160 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7161 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7162 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7163 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7164 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7165 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7166 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7167 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7168 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7169 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7171 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7173 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7177 __kmp_runtime_initialize();
7179#if KMP_MIC_SUPPORTED
7180 __kmp_check_mic_type();
7187 __kmp_abort_delay = 0;
7191 __kmp_dflt_team_nth_ub = __kmp_xproc;
7192 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7193 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7195 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7196 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7198 __kmp_max_nth = __kmp_sys_max_nth;
7199 __kmp_cg_max_nth = __kmp_sys_max_nth;
7200 __kmp_teams_max_nth = __kmp_xproc;
7201 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7202 __kmp_teams_max_nth = __kmp_sys_max_nth;
7207 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7209 __kmp_monitor_wakeups =
7210 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7211 __kmp_bt_intervals =
7212 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7215 __kmp_library = library_throughput;
7217 __kmp_static = kmp_sch_static_balanced;
7224#if KMP_FAST_REDUCTION_BARRIER
7225#define kmp_reduction_barrier_gather_bb ((int)1)
7226#define kmp_reduction_barrier_release_bb ((int)1)
7227#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7228#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7230 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7231 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7232 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7233 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7234 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7235#if KMP_FAST_REDUCTION_BARRIER
7236 if (i == bs_reduction_barrier) {
7238 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7239 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7240 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7241 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7245#if KMP_FAST_REDUCTION_BARRIER
7246#undef kmp_reduction_barrier_release_pat
7247#undef kmp_reduction_barrier_gather_pat
7248#undef kmp_reduction_barrier_release_bb
7249#undef kmp_reduction_barrier_gather_bb
7251#if KMP_MIC_SUPPORTED
7252 if (__kmp_mic_type == mic2) {
7254 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7255 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7257 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7258 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7260#if KMP_FAST_REDUCTION_BARRIER
7261 if (__kmp_mic_type == mic2) {
7262 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7263 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7270 __kmp_env_checks = TRUE;
7272 __kmp_env_checks = FALSE;
7276 __kmp_foreign_tp = TRUE;
7278 __kmp_global.g.g_dynamic = FALSE;
7279 __kmp_global.g.g_dynamic_mode = dynamic_default;
7281 __kmp_init_nesting_mode();
7283 __kmp_env_initialize(NULL);
7285#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7286 __kmp_user_level_mwait_init();
7290 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7291 if (__kmp_str_match_true(val)) {
7292 kmp_str_buf_t buffer;
7293 __kmp_str_buf_init(&buffer);
7294 __kmp_i18n_dump_catalog(&buffer);
7295 __kmp_printf(
"%s", buffer.str);
7296 __kmp_str_buf_free(&buffer);
7298 __kmp_env_free(&val);
7301 __kmp_threads_capacity =
7302 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7304 __kmp_tp_capacity = __kmp_default_tp_capacity(
7305 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7310 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7311 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7312 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7313 __kmp_thread_pool = NULL;
7314 __kmp_thread_pool_insert_pt = NULL;
7315 __kmp_team_pool = NULL;
7322 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7324 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7325 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7326 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7329 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7331 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7336 gtid = __kmp_register_root(TRUE);
7337 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7338 KMP_ASSERT(KMP_UBER_GTID(gtid));
7339 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7343 __kmp_common_initialize();
7347 __kmp_register_atfork();
7350#if !KMP_DYNAMIC_LIB || \
7351 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7356 int rc = atexit(__kmp_internal_end_atexit);
7358 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7364#if KMP_HANDLE_SIGNALS
7370 __kmp_install_signals(FALSE);
7373 __kmp_install_signals(TRUE);
7378 __kmp_init_counter++;
7380 __kmp_init_serial = TRUE;
7382 if (__kmp_version) {
7383 __kmp_print_version_1();
7386 if (__kmp_settings) {
7390 if (__kmp_display_env || __kmp_display_env_verbose) {
7391 __kmp_env_print_2();
7400 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7403void __kmp_serial_initialize(
void) {
7404 if (__kmp_init_serial) {
7407 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7408 if (__kmp_init_serial) {
7409 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7412 __kmp_do_serial_initialize();
7413 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7416static void __kmp_do_middle_initialize(
void) {
7418 int prev_dflt_team_nth;
7420 if (!__kmp_init_serial) {
7421 __kmp_do_serial_initialize();
7424 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7426 if (UNLIKELY(!__kmp_need_register_serial)) {
7429 __kmp_register_library_startup();
7434 prev_dflt_team_nth = __kmp_dflt_team_nth;
7436#if KMP_AFFINITY_SUPPORTED
7439 __kmp_affinity_initialize(__kmp_affinity);
7443 KMP_ASSERT(__kmp_xproc > 0);
7444 if (__kmp_avail_proc == 0) {
7445 __kmp_avail_proc = __kmp_xproc;
7451 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7452 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7457 if (__kmp_dflt_team_nth == 0) {
7458#ifdef KMP_DFLT_NTH_CORES
7460 __kmp_dflt_team_nth = __kmp_ncores;
7461 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7462 "__kmp_ncores (%d)\n",
7463 __kmp_dflt_team_nth));
7466 __kmp_dflt_team_nth = __kmp_avail_proc;
7467 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7468 "__kmp_avail_proc(%d)\n",
7469 __kmp_dflt_team_nth));
7473 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7474 __kmp_dflt_team_nth = KMP_MIN_NTH;
7476 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7477 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7480 if (__kmp_nesting_mode > 0)
7481 __kmp_set_nesting_mode_threads();
7485 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7487 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7492 for (i = 0; i < __kmp_threads_capacity; i++) {
7493 kmp_info_t *thread = __kmp_threads[i];
7496 if (thread->th.th_current_task->td_icvs.nproc != 0)
7499 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7504 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7505 __kmp_dflt_team_nth));
7507#ifdef KMP_ADJUST_BLOCKTIME
7509 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7510 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7511 if (__kmp_nth > __kmp_avail_proc) {
7512 __kmp_zero_bt = TRUE;
7518 TCW_SYNC_4(__kmp_init_middle, TRUE);
7520 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7523void __kmp_middle_initialize(
void) {
7524 if (__kmp_init_middle) {
7527 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7528 if (__kmp_init_middle) {
7529 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7532 __kmp_do_middle_initialize();
7533 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7536void __kmp_parallel_initialize(
void) {
7537 int gtid = __kmp_entry_gtid();
7540 if (TCR_4(__kmp_init_parallel))
7542 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7543 if (TCR_4(__kmp_init_parallel)) {
7544 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7549 if (TCR_4(__kmp_global.g.g_done)) {
7552 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7553 __kmp_infinite_loop();
7559 if (!__kmp_init_middle) {
7560 __kmp_do_middle_initialize();
7562 __kmp_assign_root_init_mask();
7563 __kmp_resume_if_hard_paused();
7566 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7567 KMP_ASSERT(KMP_UBER_GTID(gtid));
7569#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7572 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7573 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7574 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7578#if KMP_HANDLE_SIGNALS
7580 __kmp_install_signals(TRUE);
7584 __kmp_suspend_initialize();
7586#if defined(USE_LOAD_BALANCE)
7587 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7588 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7591 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7592 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7596 if (__kmp_version) {
7597 __kmp_print_version_2();
7601 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7604 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7606 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7609void __kmp_hidden_helper_initialize() {
7610 if (TCR_4(__kmp_init_hidden_helper))
7614 if (!TCR_4(__kmp_init_parallel))
7615 __kmp_parallel_initialize();
7619 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7620 if (TCR_4(__kmp_init_hidden_helper)) {
7621 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7625#if KMP_AFFINITY_SUPPORTED
7629 if (!__kmp_hh_affinity.flags.initialized)
7630 __kmp_affinity_initialize(__kmp_hh_affinity);
7634 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7638 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7641 __kmp_do_initialize_hidden_helper_threads();
7644 __kmp_hidden_helper_threads_initz_wait();
7647 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7649 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7654void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7656 kmp_disp_t *dispatch;
7661 this_thr->th.th_local.this_construct = 0;
7663 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7665 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7666 KMP_DEBUG_ASSERT(dispatch);
7667 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7671 dispatch->th_disp_index = 0;
7672 dispatch->th_doacross_buf_idx = 0;
7673 if (__kmp_env_consistency_check)
7674 __kmp_push_parallel(gtid, team->t.t_ident);
7679void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7681 if (__kmp_env_consistency_check)
7682 __kmp_pop_parallel(gtid, team->t.t_ident);
7684 __kmp_finish_implicit_task(this_thr);
7687int __kmp_invoke_task_func(
int gtid) {
7689 int tid = __kmp_tid_from_gtid(gtid);
7690 kmp_info_t *this_thr = __kmp_threads[gtid];
7691 kmp_team_t *team = this_thr->th.th_team;
7693 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7695 if (__itt_stack_caller_create_ptr) {
7697 if (team->t.t_stack_id != NULL) {
7698 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7700 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7701 __kmp_itt_stack_callee_enter(
7702 (__itt_caller)team->t.t_parent->t.t_stack_id);
7706#if INCLUDE_SSC_MARKS
7707 SSC_MARK_INVOKING();
7712 void **exit_frame_p;
7713 ompt_data_t *my_task_data;
7714 ompt_data_t *my_parallel_data;
7717 if (ompt_enabled.enabled) {
7718 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7719 .ompt_task_info.frame.exit_frame.ptr);
7721 exit_frame_p = &dummy;
7725 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7726 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7727 if (ompt_enabled.ompt_callback_implicit_task) {
7728 ompt_team_size = team->t.t_nproc;
7729 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7730 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7731 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7732 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7736#if KMP_STATS_ENABLED
7738 if (previous_state == stats_state_e::TEAMS_REGION) {
7739 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7741 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7743 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7746 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7747 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7754 *exit_frame_p = NULL;
7755 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7758#if KMP_STATS_ENABLED
7759 if (previous_state == stats_state_e::TEAMS_REGION) {
7760 KMP_SET_THREAD_STATE(previous_state);
7762 KMP_POP_PARTITIONED_TIMER();
7766 if (__itt_stack_caller_create_ptr) {
7768 if (team->t.t_stack_id != NULL) {
7769 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7771 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7772 __kmp_itt_stack_callee_leave(
7773 (__itt_caller)team->t.t_parent->t.t_stack_id);
7777 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7782void __kmp_teams_master(
int gtid) {
7784 kmp_info_t *thr = __kmp_threads[gtid];
7785 kmp_team_t *team = thr->th.th_team;
7786 ident_t *loc = team->t.t_ident;
7787 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7788 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7789 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7790 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7791 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7794 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7797 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7798 tmp->cg_nthreads = 1;
7799 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7800 " cg_nthreads to 1\n",
7802 tmp->up = thr->th.th_cg_roots;
7803 thr->th.th_cg_roots = tmp;
7807#if INCLUDE_SSC_MARKS
7810 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7811 (microtask_t)thr->th.th_teams_microtask,
7812 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7813#if INCLUDE_SSC_MARKS
7817 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7818 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7821 __kmp_join_call(loc, gtid
7830int __kmp_invoke_teams_master(
int gtid) {
7831 kmp_info_t *this_thr = __kmp_threads[gtid];
7832 kmp_team_t *team = this_thr->th.th_team;
7834 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7835 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7836 (
void *)__kmp_teams_master);
7838 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7840 int tid = __kmp_tid_from_gtid(gtid);
7841 ompt_data_t *task_data =
7842 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7843 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7844 if (ompt_enabled.ompt_callback_implicit_task) {
7845 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7846 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7848 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7851 __kmp_teams_master(gtid);
7853 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7855 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7863void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7864 kmp_info_t *thr = __kmp_threads[gtid];
7866 if (num_threads > 0)
7867 thr->th.th_set_nproc = num_threads;
7870void __kmp_push_num_threads_list(
ident_t *
id,
int gtid, kmp_uint32 list_length,
7871 int *num_threads_list) {
7872 kmp_info_t *thr = __kmp_threads[gtid];
7874 KMP_DEBUG_ASSERT(list_length > 1);
7876 if (num_threads_list[0] > 0)
7877 thr->th.th_set_nproc = num_threads_list[0];
7878 thr->th.th_set_nested_nth =
7879 (
int *)KMP_INTERNAL_MALLOC(list_length *
sizeof(
int));
7880 for (kmp_uint32 i = 0; i < list_length; ++i)
7881 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7882 thr->th.th_set_nested_nth_sz = list_length;
7885void __kmp_set_strict_num_threads(
ident_t *loc,
int gtid,
int sev,
7887 kmp_info_t *thr = __kmp_threads[gtid];
7888 thr->th.th_nt_strict =
true;
7889 thr->th.th_nt_loc = loc;
7891 if (sev == severity_warning)
7892 thr->th.th_nt_sev = sev;
7894 thr->th.th_nt_sev = severity_fatal;
7897 thr->th.th_nt_msg = msg;
7899 thr->th.th_nt_msg =
"Cannot form team with number of threads specified by "
7900 "strict num_threads clause.";
7903static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7905 KMP_DEBUG_ASSERT(thr);
7907 if (!TCR_4(__kmp_init_middle))
7908 __kmp_middle_initialize();
7909 __kmp_assign_root_init_mask();
7910 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7911 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7913 if (num_threads == 0) {
7914 if (__kmp_teams_thread_limit > 0) {
7915 num_threads = __kmp_teams_thread_limit;
7917 num_threads = __kmp_avail_proc / num_teams;
7922 if (num_threads > __kmp_dflt_team_nth) {
7923 num_threads = __kmp_dflt_team_nth;
7925 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7926 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7928 if (num_teams * num_threads > __kmp_teams_max_nth) {
7929 num_threads = __kmp_teams_max_nth / num_teams;
7931 if (num_threads == 0) {
7935 if (num_threads < 0) {
7936 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7942 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7944 if (num_threads > __kmp_dflt_team_nth) {
7945 num_threads = __kmp_dflt_team_nth;
7947 if (num_teams * num_threads > __kmp_teams_max_nth) {
7948 int new_threads = __kmp_teams_max_nth / num_teams;
7949 if (new_threads == 0) {
7952 if (new_threads != num_threads) {
7953 if (!__kmp_reserve_warn) {
7954 __kmp_reserve_warn = 1;
7955 __kmp_msg(kmp_ms_warning,
7956 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7957 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7960 num_threads = new_threads;
7963 thr->th.th_teams_size.nth = num_threads;
7968void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7970 kmp_info_t *thr = __kmp_threads[gtid];
7971 if (num_teams < 0) {
7974 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7978 if (num_teams == 0) {
7979 if (__kmp_nteams > 0) {
7980 num_teams = __kmp_nteams;
7985 if (num_teams > __kmp_teams_max_nth) {
7986 if (!__kmp_reserve_warn) {
7987 __kmp_reserve_warn = 1;
7988 __kmp_msg(kmp_ms_warning,
7989 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7990 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7992 num_teams = __kmp_teams_max_nth;
7996 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7998 __kmp_push_thread_limit(thr, num_teams, num_threads);
8003void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
8004 int num_teams_ub,
int num_threads) {
8005 kmp_info_t *thr = __kmp_threads[gtid];
8006 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
8007 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
8008 KMP_DEBUG_ASSERT(num_threads >= 0);
8010 if (num_teams_lb > num_teams_ub) {
8011 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
8012 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
8017 if (num_teams_lb == 0 && num_teams_ub > 0)
8018 num_teams_lb = num_teams_ub;
8020 if (num_teams_lb == 0 && num_teams_ub == 0) {
8021 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
8022 if (num_teams > __kmp_teams_max_nth) {
8023 if (!__kmp_reserve_warn) {
8024 __kmp_reserve_warn = 1;
8025 __kmp_msg(kmp_ms_warning,
8026 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
8027 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
8029 num_teams = __kmp_teams_max_nth;
8031 }
else if (num_teams_lb == num_teams_ub) {
8032 num_teams = num_teams_ub;
8034 if (num_threads <= 0) {
8035 if (num_teams_ub > __kmp_teams_max_nth) {
8036 num_teams = num_teams_lb;
8038 num_teams = num_teams_ub;
8041 num_teams = (num_threads > __kmp_teams_max_nth)
8043 : __kmp_teams_max_nth / num_threads;
8044 if (num_teams < num_teams_lb) {
8045 num_teams = num_teams_lb;
8046 }
else if (num_teams > num_teams_ub) {
8047 num_teams = num_teams_ub;
8053 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8055 __kmp_push_thread_limit(thr, num_teams, num_threads);
8059void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8060 kmp_info_t *thr = __kmp_threads[gtid];
8061 thr->th.th_set_proc_bind = proc_bind;
8066void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8067 kmp_info_t *this_thr = __kmp_threads[gtid];
8073 KMP_DEBUG_ASSERT(team);
8074 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8075 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8078 team->t.t_construct = 0;
8079 team->t.t_ordered.dt.t_value =
8083 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8084 if (team->t.t_max_nproc > 1) {
8086 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8087 team->t.t_disp_buffer[i].buffer_index = i;
8088 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8091 team->t.t_disp_buffer[0].buffer_index = 0;
8092 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8096 KMP_ASSERT(this_thr->th.th_team == team);
8099 for (f = 0; f < team->t.t_nproc; f++) {
8100 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8101 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8106 __kmp_fork_barrier(gtid, 0);
8109void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8110 kmp_info_t *this_thr = __kmp_threads[gtid];
8112 KMP_DEBUG_ASSERT(team);
8113 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8114 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8120 if (__kmp_threads[gtid] &&
8121 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8122 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8123 __kmp_threads[gtid]);
8124 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8125 "team->t.t_nproc=%d\n",
8126 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8128 __kmp_print_structure();
8130 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8131 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8134 __kmp_join_barrier(gtid);
8136 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8137 if (ompt_enabled.enabled &&
8138 (ompt_state == ompt_state_wait_barrier_teams ||
8139 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8140 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8141 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8142 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8144 void *codeptr = NULL;
8145 if (KMP_MASTER_TID(ds_tid) &&
8146 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8147 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8148 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8150 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8151 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8152 sync_kind = ompt_sync_region_barrier_teams;
8153 if (ompt_enabled.ompt_callback_sync_region_wait) {
8154 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8155 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8157 if (ompt_enabled.ompt_callback_sync_region) {
8158 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8159 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8162 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8163 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8164 ompt_scope_end, NULL, task_data, 0, ds_tid,
8165 ompt_task_implicit);
8171 KMP_ASSERT(this_thr->th.th_team == team);
8176#ifdef USE_LOAD_BALANCE
8180static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8183 kmp_team_t *hot_team;
8185 if (root->r.r_active) {
8188 hot_team = root->r.r_hot_team;
8189 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8190 return hot_team->t.t_nproc - 1;
8195 for (i = 1; i < hot_team->t.t_nproc; i++) {
8196 if (hot_team->t.t_threads[i]->th.th_active) {
8205static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8208 int hot_team_active;
8209 int team_curr_active;
8212 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8214 KMP_DEBUG_ASSERT(root);
8215 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8216 ->th.th_current_task->td_icvs.dynamic == TRUE);
8217 KMP_DEBUG_ASSERT(set_nproc > 1);
8219 if (set_nproc == 1) {
8220 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8229 pool_active = __kmp_thread_pool_active_nth;
8230 hot_team_active = __kmp_active_hot_team_nproc(root);
8231 team_curr_active = pool_active + hot_team_active + 1;
8234 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8235 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8236 "hot team active = %d\n",
8237 system_active, pool_active, hot_team_active));
8239 if (system_active < 0) {
8243 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8244 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8247 retval = __kmp_avail_proc - __kmp_nth +
8248 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8249 if (retval > set_nproc) {
8252 if (retval < KMP_MIN_NTH) {
8253 retval = KMP_MIN_NTH;
8256 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8264 if (system_active < team_curr_active) {
8265 system_active = team_curr_active;
8267 retval = __kmp_avail_proc - system_active + team_curr_active;
8268 if (retval > set_nproc) {
8271 if (retval < KMP_MIN_NTH) {
8272 retval = KMP_MIN_NTH;
8275 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8284void __kmp_cleanup(
void) {
8287 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8289 if (TCR_4(__kmp_init_parallel)) {
8290#if KMP_HANDLE_SIGNALS
8291 __kmp_remove_signals();
8293 TCW_4(__kmp_init_parallel, FALSE);
8296 if (TCR_4(__kmp_init_middle)) {
8297#if KMP_AFFINITY_SUPPORTED
8298 __kmp_affinity_uninitialize();
8300 __kmp_cleanup_hierarchy();
8301 TCW_4(__kmp_init_middle, FALSE);
8304 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8306 if (__kmp_init_serial) {
8307 __kmp_runtime_destroy();
8308 __kmp_init_serial = FALSE;
8311 __kmp_cleanup_threadprivate_caches();
8313 for (f = 0; f < __kmp_threads_capacity; f++) {
8314 if (__kmp_root[f] != NULL) {
8315 __kmp_free(__kmp_root[f]);
8316 __kmp_root[f] = NULL;
8319 __kmp_free(__kmp_threads);
8322 __kmp_threads = NULL;
8324 __kmp_threads_capacity = 0;
8327 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8329 kmp_old_threads_list_t *next = ptr->next;
8330 __kmp_free(ptr->threads);
8335#if KMP_USE_DYNAMIC_LOCK
8336 __kmp_cleanup_indirect_user_locks();
8338 __kmp_cleanup_user_locks();
8342 __kmp_free(ompd_env_block);
8343 ompd_env_block = NULL;
8344 ompd_env_block_size = 0;
8348#if KMP_AFFINITY_SUPPORTED
8349 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8350 __kmp_cpuinfo_file = NULL;
8353#if KMP_USE_ADAPTIVE_LOCKS
8354#if KMP_DEBUG_ADAPTIVE_LOCKS
8355 __kmp_print_speculative_stats();
8358 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8359 __kmp_nested_nth.nth = NULL;
8360 __kmp_nested_nth.size = 0;
8361 __kmp_nested_nth.used = 0;
8363 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8364 __kmp_nested_proc_bind.bind_types = NULL;
8365 __kmp_nested_proc_bind.size = 0;
8366 __kmp_nested_proc_bind.used = 0;
8367 if (__kmp_affinity_format) {
8368 KMP_INTERNAL_FREE(__kmp_affinity_format);
8369 __kmp_affinity_format = NULL;
8372 __kmp_i18n_catclose();
8374#if KMP_USE_HIER_SCHED
8375 __kmp_hier_scheds.deallocate();
8378#if KMP_STATS_ENABLED
8382 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8387int __kmp_ignore_mppbeg(
void) {
8390 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8391 if (__kmp_str_match_false(env))
8398int __kmp_ignore_mppend(
void) {
8401 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8402 if (__kmp_str_match_false(env))
8409void __kmp_internal_begin(
void) {
8415 gtid = __kmp_entry_gtid();
8416 root = __kmp_threads[gtid]->th.th_root;
8417 KMP_ASSERT(KMP_UBER_GTID(gtid));
8419 if (root->r.r_begin)
8421 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8422 if (root->r.r_begin) {
8423 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8427 root->r.r_begin = TRUE;
8429 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8434void __kmp_user_set_library(
enum library_type arg) {
8441 gtid = __kmp_entry_gtid();
8442 thread = __kmp_threads[gtid];
8444 root = thread->th.th_root;
8446 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8448 if (root->r.r_in_parallel) {
8450 KMP_WARNING(SetLibraryIncorrectCall);
8455 case library_serial:
8456 thread->th.th_set_nproc = 0;
8457 set__nproc(thread, 1);
8459 case library_turnaround:
8460 thread->th.th_set_nproc = 0;
8461 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8462 : __kmp_dflt_team_nth_ub);
8464 case library_throughput:
8465 thread->th.th_set_nproc = 0;
8466 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8467 : __kmp_dflt_team_nth_ub);
8470 KMP_FATAL(UnknownLibraryType, arg);
8473 __kmp_aux_set_library(arg);
8476void __kmp_aux_set_stacksize(
size_t arg) {
8477 if (!__kmp_init_serial)
8478 __kmp_serial_initialize();
8481 if (arg & (0x1000 - 1)) {
8482 arg &= ~(0x1000 - 1);
8487 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8490 if (!TCR_4(__kmp_init_parallel)) {
8493 if (value < __kmp_sys_min_stksize)
8494 value = __kmp_sys_min_stksize;
8495 else if (value > KMP_MAX_STKSIZE)
8496 value = KMP_MAX_STKSIZE;
8498 __kmp_stksize = value;
8500 __kmp_env_stksize = TRUE;
8503 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8508void __kmp_aux_set_library(
enum library_type arg) {
8509 __kmp_library = arg;
8511 switch (__kmp_library) {
8512 case library_serial: {
8513 KMP_INFORM(LibraryIsSerial);
8515 case library_turnaround:
8516 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8517 __kmp_use_yield = 2;
8519 case library_throughput:
8520 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8521 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8524 KMP_FATAL(UnknownLibraryType, arg);
8530static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8531 kmp_info_t *thr = __kmp_entry_thread();
8532 teams_serialized = 0;
8533 if (thr->th.th_teams_microtask) {
8534 kmp_team_t *team = thr->th.th_team;
8535 int tlevel = thr->th.th_teams_level;
8536 int ii = team->t.t_level;
8537 teams_serialized = team->t.t_serialized;
8538 int level = tlevel + 1;
8539 KMP_DEBUG_ASSERT(ii >= tlevel);
8540 while (ii > level) {
8541 for (teams_serialized = team->t.t_serialized;
8542 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8544 if (team->t.t_serialized && (!teams_serialized)) {
8545 team = team->t.t_parent;
8549 team = team->t.t_parent;
8558int __kmp_aux_get_team_num() {
8560 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8562 if (serialized > 1) {
8565 return team->t.t_master_tid;
8571int __kmp_aux_get_num_teams() {
8573 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8575 if (serialized > 1) {
8578 return team->t.t_parent->t.t_nproc;
8617typedef struct kmp_affinity_format_field_t {
8619 const char *long_name;
8622} kmp_affinity_format_field_t;
8624static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8625#if KMP_AFFINITY_SUPPORTED
8626 {
'A',
"thread_affinity",
's'},
8628 {
't',
"team_num",
'd'},
8629 {
'T',
"num_teams",
'd'},
8630 {
'L',
"nesting_level",
'd'},
8631 {
'n',
"thread_num",
'd'},
8632 {
'N',
"num_threads",
'd'},
8633 {
'a',
"ancestor_tnum",
'd'},
8635 {
'P',
"process_id",
'd'},
8636 {
'i',
"native_thread_id",
'd'}};
8639static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8641 kmp_str_buf_t *field_buffer) {
8642 int rc, format_index, field_value;
8643 const char *width_left, *width_right;
8644 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8645 static const int FORMAT_SIZE = 20;
8646 char format[FORMAT_SIZE] = {0};
8647 char absolute_short_name = 0;
8649 KMP_DEBUG_ASSERT(gtid >= 0);
8650 KMP_DEBUG_ASSERT(th);
8651 KMP_DEBUG_ASSERT(**ptr ==
'%');
8652 KMP_DEBUG_ASSERT(field_buffer);
8654 __kmp_str_buf_clear(field_buffer);
8661 __kmp_str_buf_cat(field_buffer,
"%", 1);
8672 right_justify =
false;
8674 right_justify =
true;
8678 width_left = width_right = NULL;
8679 if (**ptr >=
'0' && **ptr <=
'9') {
8687 format[format_index++] =
'%';
8689 format[format_index++] =
'-';
8691 format[format_index++] =
'0';
8692 if (width_left && width_right) {
8696 while (i < 8 && width_left < width_right) {
8697 format[format_index++] = *width_left;
8705 found_valid_name =
false;
8706 parse_long_name = (**ptr ==
'{');
8707 if (parse_long_name)
8709 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8710 sizeof(__kmp_affinity_format_table[0]);
8712 char short_name = __kmp_affinity_format_table[i].short_name;
8713 const char *long_name = __kmp_affinity_format_table[i].long_name;
8714 char field_format = __kmp_affinity_format_table[i].field_format;
8715 if (parse_long_name) {
8716 size_t length = KMP_STRLEN(long_name);
8717 if (strncmp(*ptr, long_name, length) == 0) {
8718 found_valid_name =
true;
8721 }
else if (**ptr == short_name) {
8722 found_valid_name =
true;
8725 if (found_valid_name) {
8726 format[format_index++] = field_format;
8727 format[format_index++] =
'\0';
8728 absolute_short_name = short_name;
8732 if (parse_long_name) {
8734 absolute_short_name = 0;
8742 switch (absolute_short_name) {
8744 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8747 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8750 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8753 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8756 static const int BUFFER_SIZE = 256;
8757 char buf[BUFFER_SIZE];
8758 __kmp_expand_host_name(buf, BUFFER_SIZE);
8759 rc = __kmp_str_buf_print(field_buffer, format, buf);
8762 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8765 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8768 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8772 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8773 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8775#if KMP_AFFINITY_SUPPORTED
8778 __kmp_str_buf_init(&buf);
8779 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8780 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8781 __kmp_str_buf_free(&buf);
8787 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8789 if (parse_long_name) {
8798 KMP_ASSERT(format_index <= FORMAT_SIZE);
8808size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8809 kmp_str_buf_t *buffer) {
8810 const char *parse_ptr;
8812 const kmp_info_t *th;
8813 kmp_str_buf_t field;
8815 KMP_DEBUG_ASSERT(buffer);
8816 KMP_DEBUG_ASSERT(gtid >= 0);
8818 __kmp_str_buf_init(&field);
8819 __kmp_str_buf_clear(buffer);
8821 th = __kmp_threads[gtid];
8827 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8828 parse_ptr = __kmp_affinity_format;
8830 KMP_DEBUG_ASSERT(parse_ptr);
8832 while (*parse_ptr !=
'\0') {
8834 if (*parse_ptr ==
'%') {
8836 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8837 __kmp_str_buf_catbuf(buffer, &field);
8841 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8846 __kmp_str_buf_free(&field);
8851void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8853 __kmp_str_buf_init(&buf);
8854 __kmp_aux_capture_affinity(gtid, format, &buf);
8855 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8856 __kmp_str_buf_free(&buf);
8860void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8861 int blocktime = arg;
8867 __kmp_save_internal_controls(thread);
8870 if (blocktime < KMP_MIN_BLOCKTIME)
8871 blocktime = KMP_MIN_BLOCKTIME;
8872 else if (blocktime > KMP_MAX_BLOCKTIME)
8873 blocktime = KMP_MAX_BLOCKTIME;
8875 set__blocktime_team(thread->th.th_team, tid, blocktime);
8876 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8880 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8882 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8883 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8889 set__bt_set_team(thread->th.th_team, tid, bt_set);
8890 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8892 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8893 "bt_intervals=%d, monitor_updates=%d\n",
8894 __kmp_gtid_from_tid(tid, thread->th.th_team),
8895 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8896 __kmp_monitor_wakeups));
8898 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8899 __kmp_gtid_from_tid(tid, thread->th.th_team),
8900 thread->th.th_team->t.t_id, tid, blocktime));
8904void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8905 if (!__kmp_init_serial) {
8906 __kmp_serial_initialize();
8908 __kmp_env_initialize(str);
8910 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8918PACKED_REDUCTION_METHOD_T
8919__kmp_determine_reduction_method(
8920 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8921 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8922 kmp_critical_name *lck) {
8933 PACKED_REDUCTION_METHOD_T retval;
8937 KMP_DEBUG_ASSERT(lck);
8939#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8941 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8942#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8944 retval = critical_reduce_block;
8947 team_size = __kmp_get_team_num_threads(global_tid);
8948 if (team_size == 1) {
8950 retval = empty_reduce_block;
8954 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8956#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8957 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8958 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8960#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8961 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
8962 KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8964 int teamsize_cutoff = 4;
8966#if KMP_MIC_SUPPORTED
8967 if (__kmp_mic_type != non_mic) {
8968 teamsize_cutoff = 8;
8971 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8972 if (tree_available) {
8973 if (team_size <= teamsize_cutoff) {
8974 if (atomic_available) {
8975 retval = atomic_reduce_block;
8978 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8980 }
else if (atomic_available) {
8981 retval = atomic_reduce_block;
8984#error "Unknown or unsupported OS"
8989#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8990 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC
8992#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8993 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HAIKU || KMP_OS_HURD || \
8994 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8998 if (atomic_available) {
8999 if (num_vars <= 2) {
9000 retval = atomic_reduce_block;
9006 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9007 if (atomic_available && (num_vars <= 3)) {
9008 retval = atomic_reduce_block;
9009 }
else if (tree_available) {
9010 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
9011 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
9012 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
9017#error "Unknown or unsupported OS"
9021#error "Unknown or unsupported architecture"
9029 if (__kmp_force_reduction_method != reduction_method_not_defined &&
9032 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
9034 int atomic_available, tree_available;
9036 switch ((forced_retval = __kmp_force_reduction_method)) {
9037 case critical_reduce_block:
9041 case atomic_reduce_block:
9042 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9043 if (!atomic_available) {
9044 KMP_WARNING(RedMethodNotSupported,
"atomic");
9045 forced_retval = critical_reduce_block;
9049 case tree_reduce_block:
9050 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9051 if (!tree_available) {
9052 KMP_WARNING(RedMethodNotSupported,
"tree");
9053 forced_retval = critical_reduce_block;
9055#if KMP_FAST_REDUCTION_BARRIER
9056 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9065 retval = forced_retval;
9068 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9070#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9071#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9076kmp_int32 __kmp_get_reduce_method(
void) {
9077 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9082void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9086void __kmp_hard_pause() {
9087 __kmp_pause_status = kmp_hard_paused;
9088 __kmp_internal_end_thread(-1);
9092void __kmp_resume_if_soft_paused() {
9093 if (__kmp_pause_status == kmp_soft_paused) {
9094 __kmp_pause_status = kmp_not_paused;
9096 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9097 kmp_info_t *thread = __kmp_threads[gtid];
9099 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9101 if (fl.is_sleeping())
9103 else if (__kmp_try_suspend_mx(thread)) {
9104 __kmp_unlock_suspend_mx(thread);
9107 if (fl.is_sleeping()) {
9110 }
else if (__kmp_try_suspend_mx(thread)) {
9111 __kmp_unlock_suspend_mx(thread);
9123int __kmp_pause_resource(kmp_pause_status_t level) {
9124 if (level == kmp_not_paused) {
9125 if (__kmp_pause_status == kmp_not_paused) {
9129 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9130 __kmp_pause_status == kmp_hard_paused);
9131 __kmp_pause_status = kmp_not_paused;
9134 }
else if (level == kmp_soft_paused) {
9135 if (__kmp_pause_status != kmp_not_paused) {
9142 }
else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
9144 if (__kmp_pause_status != kmp_not_paused) {
9157void __kmp_omp_display_env(
int verbose) {
9158 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9159 if (__kmp_init_serial == 0)
9160 __kmp_do_serial_initialize();
9161 __kmp_display_env_impl(!verbose, verbose);
9162 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9166void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9168 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9170 kmp_info_t **other_threads = team->t.t_threads;
9174 for (
int f = 1; f < old_nthreads; ++f) {
9175 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9177 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9183 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9184 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9188 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9190 team->t.t_threads[f]->th.th_used_in_team.store(2);
9191 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9194 team->t.b->go_release();
9200 int count = old_nthreads - 1;
9202 count = old_nthreads - 1;
9203 for (
int f = 1; f < old_nthreads; ++f) {
9204 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9205 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9206 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9207 void *, other_threads[f]->th.th_sleep_loc);
9208 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9211 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9217 team->t.b->update_num_threads(new_nthreads);
9218 team->t.b->go_reset();
9221void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9223 KMP_DEBUG_ASSERT(team);
9229 for (
int f = 1; f < new_nthreads; ++f) {
9230 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9231 (void)KMP_COMPARE_AND_STORE_ACQ32(
9232 &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
9233 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9234 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9235 (kmp_flag_32<false, false> *)NULL);
9241 int count = new_nthreads - 1;
9243 count = new_nthreads - 1;
9244 for (
int f = 1; f < new_nthreads; ++f) {
9245 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9253kmp_info_t **__kmp_hidden_helper_threads;
9254kmp_info_t *__kmp_hidden_helper_main_thread;
9255std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9257kmp_int32 __kmp_hidden_helper_threads_num = 8;
9258kmp_int32 __kmp_enable_hidden_helper = TRUE;
9260kmp_int32 __kmp_hidden_helper_threads_num = 0;
9261kmp_int32 __kmp_enable_hidden_helper = FALSE;
9265std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9267void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9272 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9273 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9274 __kmp_hidden_helper_threads_num)
9280 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9281 __kmp_hidden_helper_initz_release();
9282 __kmp_hidden_helper_main_thread_wait();
9284 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9285 __kmp_hidden_helper_worker_thread_signal();
9291void __kmp_hidden_helper_threads_initz_routine() {
9293 const int gtid = __kmp_register_root(TRUE);
9294 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9295 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9296 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9297 __kmp_hidden_helper_threads_num;
9299 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9304 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9306 __kmp_hidden_helper_threads_deinitz_release();
9326void __kmp_init_nesting_mode() {
9327 int levels = KMP_HW_LAST;
9328 __kmp_nesting_mode_nlevels = levels;
9329 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9330 for (
int i = 0; i < levels; ++i)
9331 __kmp_nesting_nth_level[i] = 0;
9332 if (__kmp_nested_nth.size < levels) {
9333 __kmp_nested_nth.nth =
9334 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9335 __kmp_nested_nth.size = levels;
9340void __kmp_set_nesting_mode_threads() {
9341 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9343 if (__kmp_nesting_mode == 1)
9344 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9345 else if (__kmp_nesting_mode > 1)
9346 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9348 if (__kmp_topology) {
9350 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9351 loc < __kmp_nesting_mode_nlevels;
9352 loc++, hw_level++) {
9353 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9354 if (__kmp_nesting_nth_level[loc] == 1)
9358 if (__kmp_nesting_mode > 1 && loc > 1) {
9359 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9360 int num_cores = __kmp_topology->get_count(core_level);
9361 int upper_levels = 1;
9362 for (
int level = 0; level < loc - 1; ++level)
9363 upper_levels *= __kmp_nesting_nth_level[level];
9364 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9365 __kmp_nesting_nth_level[loc - 1] =
9366 num_cores / __kmp_nesting_nth_level[loc - 2];
9368 __kmp_nesting_mode_nlevels = loc;
9369 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9371 if (__kmp_avail_proc >= 4) {
9372 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9373 __kmp_nesting_nth_level[1] = 2;
9374 __kmp_nesting_mode_nlevels = 2;
9376 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9377 __kmp_nesting_mode_nlevels = 1;
9379 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9381 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9382 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9384 set__nproc(thread, __kmp_nesting_nth_level[0]);
9385 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9386 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9387 if (get__max_active_levels(thread) > 1) {
9389 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9391 if (__kmp_nesting_mode == 1)
9392 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9397#if !KMP_STATS_ENABLED
9398void __kmp_reset_stats() {}
9401int __kmp_omp_debug_struct_info = FALSE;
9402int __kmp_debugging = FALSE;
9404#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9405void __kmp_itt_fini_ittlib() {}
9406void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)