Hi,
This is a cross-post from Elixir forums, since both forums have the experts.
Let me first preface this by saying I don’t think the problem lies in Erlang/Elixir.
Here’s our setup:
Production: Deployed on AWS as a cluster of k8s managed Docker containers, each running a single Erlang VM.
Development: Single Docker container (same specification as production) or simply running natively on a Mac.
The application periodically makes SSL client REST calls to a couple of servers.
Calls are made via HTTPoison
which uses hackney
, certificates managed by certifi
We were running OTP 22 and Elixir 11.X; we recently decided to upgrade to OTP 25 and Elixir 14. This also involved upgrading most of the dependencies.
Here is the problem. SSL calls to the services all timeout during SSL handshake. Here’s what is odd:
- I can make SSL calls to new websites
- Everything works in the development environment
- DNS resolution and TCP connectivity works
I simplified things a little with this snippet:
opts = [{:log_level, :debug}, {:verify, :verify_peer}, {:customize_hostname_check, [match_fun: fn(_ip, _x) -> true end]}, {:cacerts, :certifi.cacerts()}]
with {:ok, port} <- :gen_tcp.connect(%{addr: {104, 18, 128, 69}, port: 443, family: :inet}, [], 5000),
_ <- IO.puts("Connected on 443"),
_ <- :inet.peername(port) |> IO.inspect(label: :peer_info),
{:ok, ssl_port} <- :ssl.connect(port, opts, 5000)
do
IO.inspect(:ssl.getstat(ssl_port), label: :success)
:gen_tcp.close(port)
else
error -> IO.inspect(error, label: :error)
end
The :gen_tcp.connect
works, the :ssl.connect
fails. I have tried a number of options combinations.
I enabled ssl debugging and got this:
>>> TLS 1.3 Handshake, ClientHello
[{client_version,{3,3}},
{random,
<<134,152,230,87,40,72,129,119,150,175,252,27,246,202,8,211,103,12,48,
226,219,167,183,191,234,12,174,147,127,25,155,53>>},
{session_id,<<>>},
{cookie,undefined},
{cipher_suites,
["TLS_EMPTY_RENEGOTIATION_INFO_SCSV","TLS_AES_256_GCM_SHA384",
"TLS_AES_128_GCM_SHA256","TLS_CHACHA20_POLY1305_SHA256",
"TLS_AES_128_CCM_SHA256","TLS_AES_128_CCM_8_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_ECDSA_WITH_AES_256_CCM","TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8",
"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256",
"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_128_CCM","TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8",
"TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",
"TLS_DHE_RSA_WITH_AES_256_GCM_SHA384",
"TLS_DHE_DSS_WITH_AES_256_GCM_SHA384",
"TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
"TLS_DHE_DSS_WITH_AES_256_CBC_SHA256",
"TLS_DHE_RSA_WITH_AES_128_GCM_SHA256",
"TLS_DHE_DSS_WITH_AES_128_GCM_SHA256",
"TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256",
"TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
"TLS_DHE_DSS_WITH_AES_128_CBC_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
"TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
"TLS_ECDH_RSA_WITH_AES_256_CBC_SHA",
"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
"TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",
"TLS_ECDH_RSA_WITH_AES_128_CBC_SHA","TLS_DHE_RSA_WITH_AES_256_CBC_SHA",
"TLS_DHE_DSS_WITH_AES_256_CBC_SHA","TLS_DHE_RSA_WITH_AES_128_CBC_SHA",
"TLS_DHE_DSS_WITH_AES_128_CBC_SHA"]},
{compression_methods,[0]},
{extensions,
#{alpn => undefined,
client_hello_versions => {client_hello_versions,[{3,4},{3,3}]},
ec_point_formats => {ec_point_formats,[0]},
elliptic_curves => {supported_groups,[x25519,x448,secp256r1,secp384r1]},
key_share =>
{key_share_client_hello,
[{key_share_entry,x25519,
<<43,88,142,125,18,179,171,46,174,221,187,47,152,87,31,
192,187,126,240,39,122,23,222,102,173,223,129,197,121,
230,203,75>>}]},
max_frag_enum => undefined,next_protocol_negotiation => undefined,
renegotiation_info => {renegotiation_info,undefined},
signature_algs =>
{signature_algorithms,
[eddsa_ed25519,eddsa_ed448,ecdsa_secp521r1_sha512,
ecdsa_secp384r1_sha384,ecdsa_secp256r1_sha256,
rsa_pss_pss_sha512,rsa_pss_pss_sha384,rsa_pss_pss_sha256,
rsa_pss_rsae_sha512,rsa_pss_rsae_sha384,rsa_pss_rsae_sha256,
{sha512,ecdsa},
{sha512,rsa},
{sha384,ecdsa},
{sha384,rsa},
{sha256,ecdsa},
{sha256,rsa},
{sha224,ecdsa},
{sha224,rsa},
{sha,ecdsa},
{sha,rsa},
{sha,dsa}]},
signature_algs_cert => undefined,sni => undefined,srp => undefined}}]
writing (269 bytes) TLS 1.2 Record Protocol, handshake
0000 - 16 03 03 01 08 01 00 01 04 03 03 86 98 e6 57 28 ..............W(
0010 - 48 81 77 96 af fc 1b f6 ca 08 d3 67 0c 30 e2 db H.w........g.0..
0020 - a7 b7 bf ea 0c ae 93 7f 19 9b 35 00 00 62 00 ff ..........5..b..
0030 - 13 02 13 01 13 03 13 04 13 05 c0 2c c0 30 c0 ad ...........,.0..
0040 - c0 af c0 24 c0 28 cc a9 cc a8 c0 2b c0 2f c0 ac ...$.(.....+./..
0050 - c0 ae c0 2e c0 32 c0 26 c0 2a c0 2d c0 31 c0 23 .....2.&.*.-.1.#
0060 - c0 27 c0 25 c0 29 00 9f 00 a3 00 6b 00 6a 00 9e .'.%.).....k.j..
0070 - 00 a2 cc aa 00 67 00 40 c0 0a c0 14 c0 05 c0 0f .....g.@........
0080 - c0 09 c0 13 c0 04 c0 0e 00 39 00 38 00 33 00 32 .........9.8.3.2
0090 - 01 00 00 79 00 0d 00 2e 00 2c 08 07 08 08 06 03 ...y.....,......
00a0 - 05 03 04 03 08 0b 08 0a 08 09 08 06 08 05 08 04 ................
00b0 - 06 03 06 01 05 03 05 01 04 03 04 01 03 03 03 01 ................
00c0 - 02 03 02 01 02 02 00 33 00 26 00 24 00 1d 00 20 .......3.&.$...
00d0 - 2b 58 8e 7d 12 b3 ab 2e ae dd bb 2f 98 57 1f c0 +X.}......./.W..
00e0 - bb 7e f0 27 7a 17 de 66 ad df 81 c5 79 e6 cb 4b .~.'z..f....y..K
00f0 - 00 0a 00 0a 00 08 00 1d 00 1e 00 17 00 18 00 0b ................
0100 - 00 02 01 00 00 2b 00 05 04 03 04 03 03 .....+.......
error: {:error, :timeout}
We simply do not get the response back to the first TLS handshake.
I’m thinking that there is some k8s or other caching going on, and I’ll ask our k8s expert tomorrow.
I was wondering if anyone else has hit this issue?
Thanks