Cowboy Failing With 'Already Running' Process When Moved From Main App Level To Supervised Worker?

Hi all. Cowboy is killing me. When I follow their documentation and place the cowboy compilation code application-level, it works.

Dispatch = cowboy_router:compile([
        {'_', [
            {"/", reservationhandler, []}
        ]}
    ]),
    
    % Start Cowboy
    {ok, ListenerPid} = cowboy:start_clear(
        pbx_http_listener,
        [{port, 6969}],
        #{env => #{dispatch => Dispatch}}
    ),

However, this feels gross so, naturally, I want to put it within a supervised worker process in case it crashes.

So, I try it.

My app initializes a top level supervisor, which then spawns 3 more sup processes, one of which spawns the cowboy worker.

-module(pbxcontroller_sup).
-behaviour(supervisor).
-export([start_link/0, init/1]).

start_link() ->
    supervisor:start_link({local, ?MODULE}, ?MODULE, []).

init([]) ->
    logger:info("Starting PBXController Main Supervisor", []),
    SupFlags = #{
        strategy => one_for_one,
        intensity => 5,
        period => 10
    },
    Children = [
        #{
            id => cowboy_sup_id,
            start => {cowboy_sup, start_link, []},
            restart => permanent,
            shutdown => 5000,
            type => supervisor,
            modules => [cowboy_sup]
        }
    ],
    logger:info("PBXController Main Supervisor initialized with ~p children", [length(Children)]),
    {ok, {SupFlags, Children}}.

And then cowboy_sup:

-module(cowboy_sup).
-behaviour(supervisor).
-export([start_link/0, init/1]).

start_link() ->
    supervisor:start_link({local, ?MODULE}, ?MODULE, []).

init([]) ->
    logger:info("Starting Cowboy Supervisor", []),
    
    % Create a proper child spec for our Cowboy worker
    CowboyWorkerChild = #{
        id => cowboy_worker,
        start => {cowboy_worker, start_link, []},
        restart => permanent,
        shutdown => 5000,
        type => worker,
        modules => [cowboy_worker]
    },
    
    SupFlags = #{
        strategy => one_for_one,
        intensity => 10,
        period => 60
    },
    
    logger:info("Cowboy Supervisor initialized", []),
    {ok, {SupFlags, [CowboyWorkerChild]}}.

And then the cowboy worker:

-module(cowboy_worker).
-behaviour(gen_server).
-export([start_link/0]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).

start_link() ->
    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).

init([]) ->
    logger:info("Starting Cowboy Worker", []),
    
    % Clean up any existing listener
    _ = (catch cowboy:stop_listener(pbx_http_listener)),
    
    % Define the dispatch rules
    Dispatch = cowboy_router:compile([
        {'_', [
            {"/", reservationhandler, []}
        ]}
    ]),
    
    % Start Cowboy
    {ok, ListenerPid} = cowboy:start_clear(
        pbx_http_listener,
        [{port, 6969}],
        #{env => #{dispatch => Dispatch}}
    ),
    
    logger:info("Cowboy HTTP server started on port 6969", []),
    {ok, #{listener_pid => ListenerPid, listener_name => pbx_http_listener}}.

terminate(_Reason, #{listener_name := ListenerName}) ->
    logger:info("Stopping Cowboy Worker", []),
    catch cowboy:stop_listener(ListenerName),
    ok;
terminate(_Reason, _State) ->
    logger:info("Stopping Cowboy Worker (no listener)", []),
    ok.

% Minimal gen_server callbacks
handle_call(_Request, _From, State) -> {reply, ok, State}.
handle_cast(_Msg, State) -> {noreply, State}.
handle_info(_Info, State) -> {noreply, State}.
code_change(_OldVsn, State, _Extra) -> {ok, State}.

Couldn’t get much simpler than that! This compiles fine but every single time I attempt to shell in I get:

Failed to boot pbxcontroller for reason {{shutdown,
                                                          {failed_to_start_child,
                                                           cowboy_sup_id,
                                                           {already_started,
                                                            <0.328.0>}}},
                                                         {pbxcontroller_app,
                                                          start,
                                                          [normal,[]]}}

And this is AFTER I quit the last shell properly, kill all beam and erl processes, rm -rf the _build and rebar.lock files, clean, and recompile the project. That 0.328.0 process is still there, it’s the same number.

When I try to shell in with a new node like rebar3 shell --name “fresh_$(date +%s)@localhost” again, across nodes, the process id is still the same.

So, I run is_process_alive(pid(0,328,0)) from within the same shell that just errored our a second ago and it returns? False.

If it wasn’t apparently obvious, I’m new to Erlang and have quite literally no clue what’s going on here. Makes me want to napalm the whole project and start fresh, any help would be greatly appreciated!

Fixed. Don’t name your supervisor cowboy_sup… cowboy already creates one with the exact same name.

1 Like

Did you see a case where the listener is not restarting? ranch is already supervising the process in ranch_sup, so I would expect it to handle the restart for us.