Being curious about what the current numbers could be, I ran a benchmark using erlperf:
% PATH=$ERL_TOP/bin:$PATH erlperf 'call_bm:apply(call_bm).' 'call_bm:external(call_bm).' 'call_bm:local(call_bm).'
Code || QPS Time Rel
call_bm:external(call_bm). 1 34762 Ki 28 ns 100%
call_bm:local(call_bm). 1 34710 Ki 28 ns 100%
call_bm:apply(call_bm). 1 7733 Ki 129 ns 22%
It seems that nowadays with the JIT, an external call is as fast as a local call, and an apply is between 4 and 5 times slower.
This benchmark was run on my M1 MacBook Pro. I got similar relative differences on my Intel iMac.
Here is the module I used for benchmarking:
-module(call_bm).
-export([apply/1,external/1,local/1,f/0]).
apply(M) ->
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f(),
M:f().
external(_) ->
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f(),
?MODULE:f().
local(_) ->
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f(),
f().
f() ->
ok.