Skip to content

Commit ed4d53e

Browse files
committed
CDRIVER-1203 improve server-selection errors
Include each server's ismaster error in the server-selection error from a pooled client.
1 parent 723d5b6 commit ed4d53e

File tree

5 files changed

+285
-109
lines changed

5 files changed

+285
-109
lines changed

src/mongoc/mongoc-topology-scanner-private.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ typedef struct mongoc_topology_scanner
7676
mongoc_async_cmd_setup_t setup;
7777
mongoc_stream_initiator_t initiator;
7878
void *initiator_context;
79+
bson_error_t error;
7980

8081
#ifdef MONGOC_ENABLE_SSL
8182
mongoc_ssl_opt_t *ssl_opts;
@@ -122,8 +123,8 @@ mongoc_topology_scanner_work (mongoc_topology_scanner_t *ts,
122123
int32_t timeout_msec);
123124

124125
void
125-
mongoc_topology_scanner_sum_errors (mongoc_topology_scanner_t *ts,
126-
bson_error_t *error);
126+
mongoc_topology_scanner_get_error (mongoc_topology_scanner_t *ts,
127+
bson_error_t *error);
127128

128129
void
129130
mongoc_topology_scanner_reset (mongoc_topology_scanner_t *ts);

src/mongoc/mongoc-topology-scanner.c

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616

1717
#include <bson.h>
18+
#include <bson-string.h>
1819

1920
#include "mongoc-error.h"
2021
#include "mongoc-trace.h"
@@ -535,6 +536,8 @@ mongoc_topology_scanner_start (mongoc_topology_scanner_t *ts,
535536
return;
536537
}
537538

539+
memset (&ts->error, 0, sizeof (bson_error_t));
540+
538541
if (obey_cooldown) {
539542
/* when current cooldown period began */
540543
cooldown = bson_get_monotonic_time ()
@@ -560,6 +563,45 @@ mongoc_topology_scanner_start (mongoc_topology_scanner_t *ts,
560563
}
561564
}
562565

566+
/*
567+
*--------------------------------------------------------------------------
568+
*
569+
* mongoc_topology_scanner_finish_scan --
570+
*
571+
* Summarizes all scanner node errors into one error message.
572+
*
573+
*--------------------------------------------------------------------------
574+
*/
575+
576+
static void
577+
mongoc_topology_scanner_finish (mongoc_topology_scanner_t *ts)
578+
{
579+
mongoc_topology_scanner_node_t *node, *tmp;
580+
bson_error_t *error = &ts->error;
581+
bson_string_t *msg;
582+
583+
BSON_ASSERT (!error->code); /* cleared by scanner_start */
584+
585+
msg = bson_string_new (NULL);
586+
587+
DL_FOREACH_SAFE (ts->nodes, node, tmp) {
588+
if (node->last_error.code) {
589+
if (msg->len) {
590+
bson_string_append_c (msg, ' ');
591+
}
592+
593+
bson_string_append_printf (msg, "[%s]", node->last_error.message);
594+
595+
/* last error domain and code win */
596+
error->domain = node->last_error.domain;
597+
error->code = node->last_error.code;
598+
}
599+
}
600+
601+
bson_strncpy ((char *) &error->message, msg->str, sizeof (error->message));
602+
bson_string_free (msg, true);
603+
}
604+
563605
/*
564606
*--------------------------------------------------------------------------
565607
*
@@ -585,6 +627,7 @@ mongoc_topology_scanner_work (mongoc_topology_scanner_t *ts,
585627

586628
if (! r) {
587629
ts->in_progress = false;
630+
mongoc_topology_scanner_finish (ts);
588631
}
589632

590633
return r;
@@ -593,40 +636,21 @@ mongoc_topology_scanner_work (mongoc_topology_scanner_t *ts,
593636
/*
594637
*--------------------------------------------------------------------------
595638
*
596-
* mongoc_topology_scanner_sum_errors --
639+
* mongoc_topology_scanner_get_error --
597640
*
598-
* Summarizes all scanner node errors into one error message
641+
* Copy the scanner's current error; which may no-error (code 0).
599642
*
600643
*--------------------------------------------------------------------------
601644
*/
602645

603646
void
604-
mongoc_topology_scanner_sum_errors (mongoc_topology_scanner_t *ts,
605-
bson_error_t *error)
647+
mongoc_topology_scanner_get_error (mongoc_topology_scanner_t *ts,
648+
bson_error_t *error)
606649
{
607-
mongoc_topology_scanner_node_t *node, *tmp;
608-
609-
DL_FOREACH_SAFE (ts->nodes, node, tmp) {
610-
if (node->last_error.code) {
611-
char *msg = NULL;
612-
613-
if (error->code) {
614-
msg = bson_strdup(error->message);
615-
}
650+
BSON_ASSERT (ts);
651+
BSON_ASSERT (error);
616652

617-
bson_set_error(error,
618-
MONGOC_ERROR_SERVER_SELECTION,
619-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
620-
"%s[%s] ",
621-
msg ? msg : "", node->last_error.message);
622-
if (msg) {
623-
bson_free (msg);
624-
}
625-
}
626-
}
627-
if (error->code) {
628-
error->message[strlen(error->message)-1] = '\0';
629-
}
653+
memcpy (error, &ts->error, sizeof (bson_error_t));
630654
}
631655

632656
/*

src/mongoc/mongoc-topology.c

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
#include "mongoc-error.h"
1818
#include "mongoc-topology-private.h"
19-
#include "mongoc-uri-private.h"
2019
#include "mongoc-util-private.h"
2120

2221
#include "utlist.h"
@@ -326,22 +325,46 @@ _mongoc_topology_run_scanner (mongoc_topology_t *topology,
326325
*--------------------------------------------------------------------------
327326
*/
328327
static void
329-
_mongoc_topology_do_blocking_scan (mongoc_topology_t *topology, bson_error_t *error) {
330-
mongoc_topology_scanner_start (topology->scanner,
331-
topology->connect_timeout_msec,
328+
_mongoc_topology_do_blocking_scan (mongoc_topology_t *topology,
329+
bson_error_t *error)
330+
{
331+
mongoc_topology_scanner_t *scanner;
332+
333+
scanner = topology->scanner;
334+
mongoc_topology_scanner_start (scanner,
335+
(int32_t) topology->connect_timeout_msec,
332336
true);
333337

334338
while (_mongoc_topology_run_scanner (topology,
335339
topology->connect_timeout_msec)) {}
336340

337-
/* Aggregate all scanner errors, if any */
338-
mongoc_topology_scanner_sum_errors (topology->scanner, error);
341+
mongoc_topology_scanner_get_error (scanner, error);
342+
339343
/* "retired" nodes can be checked again in the next scan */
340-
mongoc_topology_scanner_reset (topology->scanner);
344+
mongoc_topology_scanner_reset (scanner);
341345
topology->last_scan = bson_get_monotonic_time ();
342346
topology->stale = false;
343347
}
344348

349+
static void
350+
_mongoc_server_selection_error (const char *msg,
351+
const bson_error_t *scanner_error,
352+
bson_error_t *error)
353+
{
354+
if (scanner_error && scanner_error->code) {
355+
bson_set_error (error,
356+
MONGOC_ERROR_SERVER_SELECTION,
357+
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
358+
"%s: %s",
359+
msg, scanner_error->message);
360+
} else {
361+
bson_set_error (error,
362+
MONGOC_ERROR_SERVER_SELECTION,
363+
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
364+
"%s", msg);
365+
}
366+
}
367+
345368
/*
346369
*-------------------------------------------------------------------------
347370
*
@@ -376,6 +399,8 @@ mongoc_topology_select (mongoc_topology_t *topology,
376399
const mongoc_read_prefs_t *read_prefs,
377400
bson_error_t *error)
378401
{
402+
static const char *timeout_msg =
403+
"No suitable servers found: `serverSelectionTimeoutMS` expired";
379404
int r;
380405
int64_t local_threshold_ms;
381406
mongoc_server_description_t *selected_server = NULL;
@@ -416,11 +441,10 @@ mongoc_topology_select (mongoc_topology_t *topology,
416441

417442
if (scan_ready > expire_at && !try_once) {
418443
/* selection timeout will expire before min heartbeat passes */
419-
bson_set_error(error,
420-
MONGOC_ERROR_SERVER_SELECTION,
421-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
422-
"No suitable servers found: "
423-
"`minheartbeatfrequencyms` not reached yet");
444+
_mongoc_server_selection_error (
445+
"No suitable servers found: "
446+
"(`minHeartbeatFrequencyMS` not reached yet)",
447+
&scanner_error, error);
424448
goto FAIL;
425449
}
426450

@@ -447,31 +471,20 @@ mongoc_topology_select (mongoc_topology_t *topology,
447471

448472
if (try_once) {
449473
if (tried_once) {
450-
if (scanner_error.code) {
451-
bson_set_error(error,
452-
MONGOC_ERROR_SERVER_SELECTION,
453-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
454-
"No suitable servers found "
455-
"(`serverselectiontryonce` set): %s", scanner_error.message);
456-
} else {
457-
bson_set_error(error,
458-
MONGOC_ERROR_SERVER_SELECTION,
459-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
460-
"No suitable servers found "
461-
"(`serverselectiontryonce` set)");
462-
}
474+
_mongoc_server_selection_error (
475+
"No suitable servers found (`serverSelectionTryOnce` set)",
476+
&scanner_error, error);
477+
463478
goto FAIL;
464479
}
465480
} else {
466481
loop_end = bson_get_monotonic_time ();
467482

468483
if (loop_end > expire_at) {
469484
/* no time left in server_selection_timeout_msec */
470-
bson_set_error(error,
471-
MONGOC_ERROR_SERVER_SELECTION,
472-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
473-
"No suitable servers found: "
474-
"`serverselectiontimeoutms` timed out");
485+
_mongoc_server_selection_error (timeout_msg,
486+
&scanner_error, error);
487+
475488
goto FAIL;
476489
}
477490
}
@@ -493,6 +506,7 @@ mongoc_topology_select (mongoc_topology_t *topology,
493506
r = mongoc_cond_timedwait (&topology->cond_client, &topology->mutex,
494507
(expire_at - loop_start) / 1000);
495508

509+
mongoc_topology_scanner_get_error (topology->scanner, &scanner_error);
496510
mongoc_mutex_unlock (&topology->mutex);
497511

498512
#ifdef _WIN32
@@ -501,27 +515,25 @@ mongoc_topology_select (mongoc_topology_t *topology,
501515
if (r == ETIMEDOUT) {
502516
#endif
503517
/* handle timeouts */
504-
bson_set_error(error,
505-
MONGOC_ERROR_SERVER_SELECTION,
506-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
507-
"Timed out trying to select a server");
518+
_mongoc_server_selection_error (timeout_msg,
519+
&scanner_error, error);
520+
508521
goto FAIL;
509522
} else if (r) {
510523
bson_set_error(error,
511524
MONGOC_ERROR_SERVER_SELECTION,
512525
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
513-
"Unknown error '%d' received while waiting on thread condition",
514-
r);
526+
"Unknown error '%d' received while waiting on "
527+
"thread condition", r);
515528
goto FAIL;
516529
}
517530

518531
loop_start = bson_get_monotonic_time ();
519532

520533
if (loop_start > expire_at) {
521-
bson_set_error(error,
522-
MONGOC_ERROR_SERVER_SELECTION,
523-
MONGOC_ERROR_SERVER_SELECTION_FAILURE,
524-
"Timed out trying to select a server");
534+
_mongoc_server_selection_error (timeout_msg,
535+
&scanner_error, error);
536+
525537
goto FAIL;
526538
}
527539
} else {

0 commit comments

Comments
 (0)