Having hard time to interpret instrument:allocations().

Hello. I’ve encountered memleak in our recent product feature which involved NIF-dependency upgrade. I’m not really sure which specific part of application exhausts all available memory on server so I started investigation of current memory usage. observer_cli metrics show that ~80% of allocated memory by erlang node is used up by Process memory type. Sorted by memory process list does not show any signs of abnormal use of process memory nor excessive process count, message boxes also seem okay. So I’ve tried to figure out memory allocation issues with instrument:allocations() and +Muatags true BEAM flag enabled. Is there any mnemonic rule to watch out in allocation output? What exactly I should be watching for? I provided obfuscated allocations stats below. Sorry for such a noob question lol but I’ve never used this tool before and could not find any meaningful guides about this module usage.

Erlang/OTP 22 [erts-10.5.1]

Eshell V10.5.1  (abort with ^G)
(app@servername)1> instrument:allocations().
{ok,{128,917579632,
     #{asn1rt_nif =>
           #{drv_binary => {248,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       crypto => #{drv_binary => {0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       sniffer_nif =>
           #{binary => {0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_cond => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_mutex => {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_tid => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             nif_internal => {14,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0}},
       jiffy =>
           #{binary => {0,91,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_binary => {0,121,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       prim_buffer =>
           #{binary => {0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_binary => {0,0,0,16,11,0,0,0,0,0,0,0,0,0,0,0,0,0},
             nif_internal => {0,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       prim_file =>
           #{binary => {0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             monitor_extended => {0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             nif_trap_export_entry => {0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             process_specific_data => {7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       spawn_forker =>
           #{driver_select_data_state =>
                 {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       system =>
           #{binary => {0,143607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             scheduler_sleep_info => {0,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0},
             misc_aux_work_q => {0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0},
             poll_fds => {0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0},
             sl_thr_q_element => {95,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             old_heap =>
                 {0,0,0,1,1187,10803,36751,21196,110481,7802,52916,19145,
                  12470,0,0,1,0,3},
             heap_frag =>
                 {6092,326078,103308,4590,0,66157,0,0,0,0,0,0,0,0,0,0,0,0},
             monitor => {0,404,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             monitor_extended => {0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             db_segment => {0,0,0,0,67,2,11,1488,0,0,0,0,0,0,0,0,0,0},
             fun_entry => {10928,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             re_stack => {0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             dcache => {0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0},
             monitor_link_dist => {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             beam_register => {0,0,0,0,0,0,180,0,0,0,0,0,0,0,0,0,0,0},
             re_heap => {0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             pre_alloc_data => {0,0,0,0,0,0,4,3,2,3,3,0,0,1,0,0,0,0},
             heap =>
                 {0,0,85,8306,1192,37633,32915,45024,155614,20414,8042,14,5,0,
                  1,2,0,4},
             fun_tab => {0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0},
             sys_blocking => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             pollset => {4,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_tab => {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             code => {0,0,84,131,173,161,202,145,98,70,24,4,6,1,1,0,0,0},
             atom_tab => {0,0,0,0,0,0,60,0,0,0,0,1,0,0,0,0,0,0},
             ll_ptimer => {172135,10621,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             atom_text => {0,0,0,0,0,0,0,0,41,0,0,0,0,0,0,0,0,0},
             msg_ref => {404104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             re_subject => {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             port_tab => {0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0},
             db_term =>
                 {80618,2678486,214467,1021,189,2,0,0,0,0,0,0,0,0,0,0,0,0},
             magic_ref_table_buckets => {41,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             erl_bif_atomics => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             microstate_accounting => {1,93,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             port => {0,0,60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             proc => {0,0,303880,489,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             module_refs => {0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0},
             node_tab => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             nif_internal =>
                 {2805,62,28483,2356319,6651722,50224,0,2764,0,0,0,0,0,0,0,0,
                  0,0},
             cpu_groups_map => {3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             ll_migration_paths => {0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0},
             hipe_long_lived => {0,0,1,0,0,0,92,0,0,0,0,0,0,0,0,0,0,0},
             ethread_standard => {3,0,0,0,607,0,0,0,0,0,1,0,0,0,0,0,0,0},
             run_queues => {0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0},
             timer_wheel => {0,0,0,0,0,0,0,0,0,0,0,40,0,0,0,0,0,0},
             taint_list => {7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             atom_entry => {59688,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             crash_dump => {0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0},
             message => {0,150719,42,152,8,1,0,0,0,0,0,0,0,0,0,0,0,0},
             bif_timer => {0,93,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             dist_entry => {0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             node_entry => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             reg_proc => {321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_tsd => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             thr_prgr_internal_data => {0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0},
             module_entry => {0,3295,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             magic_ref_table => {0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0},
             link => {254580,29338,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             db_stack => {0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             port_specific_data => {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_mutex => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             dist_tab => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             bits_buf => {90,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             db_tab => {0,70,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             persisten_term => {0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_rwlock => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             arg_reg => {93200,2196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             tracer_nif => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             fd_status => {0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0},
             scheduler_data => {0,2,0,0,0,0,0,1,1,0,0,0,0,2,0,0,0,0},
             db_heir_data => {0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             cpu_data => {0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_binary =>
                 {604652,115689,45062,26978,193,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_lock => {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             export_entry => {0,25635,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             export_tab => {0,0,0,0,0,3,78,0,0,0,3,0,0,0,0,0,0,0},
             port_data_lock => {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             magic_indirection => {79,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             db_tabs => {0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0},
             driver => {0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             proc_tab => {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
             drv_internal => {0,0,56,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0},
             reg_tab => {0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0},
             module_tab => {0,0,3,0,0,3,6,0,0,0,0,0,0,0,0,0,0,0},
             internal_async_data => {0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0},
             run_queue_balancing => {0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0},
             timer_service => {0,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             db_match_pseudo_proc => {0,0,0,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             thr_prgr_data => {54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_select_data_state => {2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             proc_dict => {251570,49723,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             ethread_long_lived => {0,0,0,0,267,0,0,0,0,0,0,0,0,0,0,0,0,0},
             process_specific_data => {39,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       tcp_inet =>
           #{atom_entry => {4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_select_data_state =>
                 {57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_tid => {40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             driver_tsd => {40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_binary => {48,9,45,279,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             drv_internal => {10,40,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             heap_frag => {2,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             link => {45,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             link_extended => {0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             message => {0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             monitor => {0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             msg_ref => {28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             node_entry => {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
             port => {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}},
       udp_inet =>
           #{driver_event_state => {0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0}}}}}

A simple thing to look out for is a large amount of allocations under a single type. To me, nif_internal under system sticks out a lot, as it’s rather unusual for NIF-related allocations to not be attributed to a specific NIF.

This happens when the runtime system is unable to properly attribute it to the NIF in question, for example during the early stages NIF loading (which is unlikely given the amount), or if the allocations are made on a non-scheduler thread.

Does your NIF do any operations on its own threads?

Yeah, you’re right, our NIF lib (it’s named sniffer_nif in allocations output above) spawns own threads under the hood. This threads handle incoming traffic and perform required allocations with enif_alloc(size_t) to perform packet reassembly routines.

So histogram values for system->nif_internal don’t mean exactly that such allocations took place inside BIFs (which I’ve assumed was the case because it was in system group) but rather this means that there are some untagged allocations inside NIF which could not be categorized elsewhere, that’s correct?

Yes. In hindsight it should probably have been further categorized, e.g. per thread type for those known to the emulator, and “other” for those not. That would’ve made this stick out a bit more. I’ll make a PR when I find the time :slightly_smiling_face:

3 Likes

I see. Gotta dig into latest patch for NIF library for excessive/unmanaged allocations then. Thanks a lot!