Why is the refc_binary reference invalid

Hi all

I create a refc_binary Bin1 and then create a sub_binary Bin2 with Bin1 mix some bits
I thought Bin2’s erl_sub_bin->orig should refer to Bin1’s address but I was wrong
Did I do something wrong

erl

test_4() ->
  Bin1 = erlang:term_to_binary(<<0:(8 * 65)>>),
  Bin2 = <<Bin1/binary, 0:10>>,
  io:format("Pid:~w~nBin1:~w~nBin2:~w~nIsDiffBinary:~w~n", [
    self(), erts_internal:term_type(Bin1),erts_internal:term_type(Bin2), binary_addr:is_diff_binary(Bin1, Bin2)
]).

nif

typedef struct proc_bin {
    u_int64_t thing_word;		/* Subtag REFC_BINARY_SUBTAG. */
    u_int64_t size;			/* Binary size in bytes. */
    struct erl_off_heap_header *next;
    void *val;		/* Pointer to Binary structure. */
    u_int8_t *bytes;		/* Pointer to the actual data bytes. */
    u_int64_t flags;			/* Flag word. */
} ProcBin;

typedef struct erl_sub_bin
{
    u_int64_t thing_word; /* Subtag SUB_BINARY_SUBTAG. */
    u_int64_t size;       /* Binary size in bytes. */
    u_int64_t offs;       /* Offset into original binary. */
    u_int8_t bitsize;
    u_int8_t bitoffs;
    u_int8_t is_writable; /* The underlying binary is writable */
    u_int64_t orig;       /* Original binary (REFC or HEAP binary). */
} ErlSubBin;

static ERL_NIF_TERM is_diff_binary(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
    u_int64_t refc_binary = argv[0];
    u_int64_t sub_binary = argv[1];
    ProcBin *pRefcBin = (ProcBin *) (refc_binary - 2);
    ErlSubBin *pSubBin = (ErlSubBin *) (sub_binary - 2);
    ProcBin *pRefcBin2 = (ProcBin *) (pSubBin->orig - 2);
    enif_fprintf(stderr, "refc_binary:%lu sub_binary:%lu\n", refc_binary, sub_binary);
    enif_fprintf(stderr, "pRefcBin:%lu pSubBin:%lu pRefcBin2:%lu\n", pRefcBin->thing_word, pSubBin->thing_word, pRefcBin2->thing_word);
    enif_fprintf(stderr, "pRefcBin:%p pRefcBin2:%p\n", pRefcBin->bytes, pRefcBin2->bytes);
    return enif_make_atom(env, refc_binary == pSubBin->orig ? "true" : "false");
}

output

Erlang/OTP 25 [erts-13.0.4] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:1] [jit:ns]

Eshell V13.0.4  (abort with ^G)
1> binary
binary         binary_addr    binary_test    
1> binary_test:test_4().
refc_binary:140374128562314 sub_binary:140374128562410
pRefcBin:352 pSubBin:232 pRefcBin2:352
pRefcBin:0x0000563942050a28 pRefcBin2:0x0000563942050ff0
Pid:<0.137.0>
Bin1:refc_binary
Bin2:sub_binary
IsDiffBinary:false
ok
1 Like

There are two use cases for “SubBinary” in erts. Firstly, it is used to create a pointer into another binary, which is the use case I think you were expecting. However, it is also used to point to bitstrings that are not binaries (that is binaries that are not an even multiple of 8 bits large). This is the scenario that your code example shows.

To create a sub-binary that is extracted from another binary you do like this:

1> Bin = erlang:term_to_binary(<<0:(8 * 65)>>),
<<SubBin:65/bytes, _/binary>> = Bin,
io:format("Pid:~w~nBin:~w~nSubBin:~w~n", [
    self(), erts_internal:term_type(Bin),erts_internal:term_type(SubBin)
]).
Pid:<0.89.0>
Bin:refc_binary
SubBin:sub_binary
ok
2>

This should make result of your binary_addr test work as you expect.

3 Likes

i got it

test_5() ->
  Bin1 = <<>>,
  Bin2 = <<Bin1/binary, 1:(8*65)>>,
  Bin3 = <<Bin2/binary, 1:1>>,
  io:format("is_same_binary:~w ~n",[binary_addr:is_same_binary(Bin2, Bin3)]).
static ERL_NIF_TERM is_same_binary(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
    u_int64_t sub_binary1 = argv[0];
    u_int64_t sub_binary2 = argv[1];
    ErlSubBin *pSubBin1 = (ErlSubBin *) (sub_binary1 - 2);
    ErlSubBin *pSubBin2 = (ErlSubBin *) (sub_binary2 - 2);
    return enif_make_atom(env, pSubBin1->orig == pSubBin2->orig ? "true" : "false");
}

If i have a large binary and i append new data to it later, can i reduce the cost of copying for the first time? like above example.

1 Like

Is there a method to set is_writeable when creating sub_binary for the first time to 1

1 Like

You should probably read Erlang -- Constructing and Matching Binaries, it contains all the information about how to work with binaries in the most efficient manner.

In general: If you are creating a binary in a tight loop, then the compiler can emit instructions that allow you to append to a binary without copying. However, I normally find that it is better to build a list of binaries and then either use erlang:iolist_to_binary/1 to flatten the binary at the end or just send the iolist to gen_tcp:send/2 directly.

1 Like

thank you so much

1 Like