diff --git a/backport-CVE-2023-4408.patch b/backport-CVE-2023-4408.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1d3971501f792e40875319942c9cb63b7ed1851 --- /dev/null +++ b/backport-CVE-2023-4408.patch @@ -0,0 +1,901 @@ +From 608707b4f5b473e416563bfe0d43e26d6dc4a5c6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Mon, 11 Sep 2023 10:35:28 +0200 +Subject: [PATCH] Use hashtable when parsing a message + +When parsing messages use a hashtable instead of a linear search to +reduce the amount of work done in findname when there's more than one +name in the section. + +There are two hashtables: + +1) hashtable for owner names - that's constructed for each section when +we hit the second name in the section and destroyed right after parsing +that section; + +2) per-name hashtable - for each name in the section, we construct a new +hashtable for that name if there are more than one rdataset for that +particular name. + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind/9.18.24/patches/0001-CVE-2023-4408.patch + +(cherry picked from commit b8a96317544c7b310b4f74360825a87b6402ddc2) + +--- + lib/dns/include/dns/message.h | 38 ---- + lib/dns/include/dns/name.h | 37 ++-- + lib/dns/message.c | 374 ++++++++++++++++++++++------------ + lib/dns/name.c | 1 + + lib/isc/ht.c | 55 ++++- + 5 files changed, 309 insertions(+), 196 deletions(-) + +diff --git a/lib/dns/include/dns/message.h b/lib/dns/include/dns/message.h +index 940c9b1..f15884a 100644 +--- a/lib/dns/include/dns/message.h ++++ b/lib/dns/include/dns/message.h +@@ -856,44 +856,6 @@ dns_message_findtype(const dns_name_t *name, dns_rdatatype_t type, + *\li #ISC_R_NOTFOUND -- the desired type does not exist. + */ + +-isc_result_t +-dns_message_find(const dns_name_t *name, dns_rdataclass_t rdclass, +- dns_rdatatype_t type, dns_rdatatype_t covers, +- dns_rdataset_t **rdataset); +-/*%< +- * Search the name for the specified rdclass and type. If it is found, +- * *rdataset is filled in with a pointer to that rdataset. +- * +- * Requires: +- *\li if '**rdataset' is non-NULL, *rdataset needs to be NULL. +- * +- *\li 'type' be a valid type, and NOT dns_rdatatype_any. +- * +- *\li If 'type' is dns_rdatatype_rrsig, 'covers' must be a valid type. +- * Otherwise it should be 0. +- * +- * Returns: +- *\li #ISC_R_SUCCESS -- all is well. +- *\li #ISC_R_NOTFOUND -- the desired type does not exist. +- */ +- +-void +-dns_message_movename(dns_message_t *msg, dns_name_t *name, +- dns_section_t fromsection, dns_section_t tosection); +-/*%< +- * Move a name from one section to another. +- * +- * Requires: +- * +- *\li 'msg' be valid. +- * +- *\li 'name' must be a name already in 'fromsection'. +- * +- *\li 'fromsection' must be a valid section. +- * +- *\li 'tosection' must be a valid section. +- */ +- + void + dns_message_addname(dns_message_t *msg, dns_name_t *name, + dns_section_t section); +diff --git a/lib/dns/include/dns/name.h b/lib/dns/include/dns/name.h +index a758c4d..199856a 100644 +--- a/lib/dns/include/dns/name.h ++++ b/lib/dns/include/dns/name.h +@@ -68,6 +68,7 @@ + #include + #include + ++#include + #include + #include + #include /* Required for storage size of dns_label_t. */ +@@ -111,6 +112,7 @@ struct dns_name { + isc_buffer_t *buffer; + ISC_LINK(dns_name_t) link; + ISC_LIST(dns_rdataset_t) list; ++ isc_ht_t *ht; + }; + + #define DNS_NAME_MAGIC ISC_MAGIC('D', 'N', 'S', 'n') +@@ -166,30 +168,24 @@ extern const dns_name_t *dns_wildcardname; + * unsigned char offsets[] = { 0, 6 }; + * dns_name_t value = DNS_NAME_INITABSOLUTE(data, offsets); + */ +-#define DNS_NAME_INITNONABSOLUTE(A, B) \ +- { \ +- DNS_NAME_MAGIC, A, (sizeof(A) - 1), sizeof(B), \ +- DNS_NAMEATTR_READONLY, B, NULL, \ +- { (void *)-1, (void *)-1 }, { \ +- NULL, NULL \ +- } \ ++#define DNS_NAME_INITNONABSOLUTE(A, B) \ ++ { \ ++ DNS_NAME_MAGIC, A, (sizeof(A) - 1), sizeof(B), \ ++ DNS_NAMEATTR_READONLY, B, NULL, \ ++ { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ + } + +-#define DNS_NAME_INITABSOLUTE(A, B) \ +- { \ +- DNS_NAME_MAGIC, A, sizeof(A), sizeof(B), \ +- DNS_NAMEATTR_READONLY | DNS_NAMEATTR_ABSOLUTE, B, \ +- NULL, { (void *)-1, (void *)-1 }, { \ +- NULL, NULL \ +- } \ ++#define DNS_NAME_INITABSOLUTE(A, B) \ ++ { \ ++ DNS_NAME_MAGIC, A, sizeof(A), sizeof(B), \ ++ DNS_NAMEATTR_READONLY | DNS_NAMEATTR_ABSOLUTE, B, \ ++ NULL, { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ + } + +-#define DNS_NAME_INITEMPTY \ +- { \ +- DNS_NAME_MAGIC, NULL, 0, 0, 0, NULL, NULL, \ +- { (void *)-1, (void *)-1 }, { \ +- NULL, NULL \ +- } \ ++#define DNS_NAME_INITEMPTY \ ++ { \ ++ DNS_NAME_MAGIC, NULL, 0, 0, 0, NULL, NULL, \ ++ { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ + } + + /*% +@@ -1330,6 +1326,7 @@ ISC_LANG_ENDDECLS + _n->buffer = NULL; \ + ISC_LINK_INIT(_n, link); \ + ISC_LIST_INIT(_n->list); \ ++ _n->ht = NULL; \ + } while (0) + + #define DNS_NAME_RESET(n) \ +diff --git a/lib/dns/message.c b/lib/dns/message.c +index 761a8e1..8654e92 100644 +--- a/lib/dns/message.c ++++ b/lib/dns/message.c +@@ -22,6 +22,8 @@ + #include + + #include ++#include ++#include + #include + #include + #include +@@ -493,9 +495,11 @@ msgresetsigs(dns_message_t *msg, bool replying) { + } else { + dns_rdataset_disassociate(msg->tsig); + isc_mempool_put(msg->rdspool, msg->tsig); ++ msg->tsig = NULL; + if (msg->querytsig != NULL) { + dns_rdataset_disassociate(msg->querytsig); + isc_mempool_put(msg->rdspool, msg->querytsig); ++ msg->querytsig = NULL; + } + } + dns_message_puttempname(msg, &msg->tsigname); +@@ -790,6 +794,18 @@ dns_message_detach(dns_message_t **messagep) { + } + } + ++static isc_result_t ++name_hash_add(isc_ht_t *ht, dns_name_t *name, dns_name_t **foundp) { ++ isc_result_t result = isc_ht_find(ht, name->ndata, name->length, ++ (void **)foundp); ++ if (result == ISC_R_SUCCESS) { ++ return (ISC_R_EXISTS); ++ } ++ result = isc_ht_add(ht, name->ndata, name->length, (void *)name); ++ INSIST(result == ISC_R_SUCCESS); ++ return (ISC_R_SUCCESS); ++} ++ + static isc_result_t + findname(dns_name_t **foundname, const dns_name_t *target, + dns_namelist_t *section) { +@@ -809,29 +825,26 @@ findname(dns_name_t **foundname, const dns_name_t *target, + return (ISC_R_NOTFOUND); + } + +-isc_result_t +-dns_message_find(const dns_name_t *name, dns_rdataclass_t rdclass, +- dns_rdatatype_t type, dns_rdatatype_t covers, +- dns_rdataset_t **rdataset) { +- dns_rdataset_t *curr; +- +- REQUIRE(name != NULL); +- REQUIRE(rdataset == NULL || *rdataset == NULL); +- +- for (curr = ISC_LIST_TAIL(name->list); curr != NULL; +- curr = ISC_LIST_PREV(curr, link)) +- { +- if (curr->rdclass == rdclass && curr->type == type && +- curr->covers == covers) +- { +- if (rdataset != NULL) { +- *rdataset = curr; +- } +- return (ISC_R_SUCCESS); +- } +- } ++typedef struct __attribute__((__packed__)) rds_key { ++ dns_rdataclass_t rdclass; ++ dns_rdatatype_t type; ++ dns_rdatatype_t covers; ++} rds_key_t; + +- return (ISC_R_NOTFOUND); ++static isc_result_t ++rds_hash_add(isc_ht_t *ht, dns_rdataset_t *rds, dns_rdataset_t **foundp) { ++ rds_key_t key = { .rdclass = rds->rdclass, ++ .type = rds->type, ++ .covers = rds->covers }; ++ isc_result_t result = isc_ht_find(ht, (const unsigned char *)&key, ++ sizeof(key), (void **)foundp); ++ if (result == ISC_R_SUCCESS) { ++ return (ISC_R_EXISTS); ++ } ++ result = isc_ht_add(ht, (const unsigned char *)&key, sizeof(key), ++ (void *)rds); ++ INSIST(result == ISC_R_SUCCESS); ++ return (ISC_R_SUCCESS); + } + + isc_result_t +@@ -958,6 +971,18 @@ getrdata(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + } \ + } while (0) + ++static void ++cleanup_name_hashmaps(dns_namelist_t *section) { ++ dns_name_t *name = NULL; ++ for (name = ISC_LIST_HEAD(*section); name != NULL; ++ name = ISC_LIST_NEXT(name, link)) ++ { ++ if (name->ht != NULL) { ++ isc_ht_destroy(&name->ht); ++ } ++ } ++} ++ + static isc_result_t + getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + unsigned int options) { +@@ -967,13 +992,19 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + dns_name_t *name2 = NULL; + dns_rdataset_t *rdataset = NULL; + dns_rdatalist_t *rdatalist = NULL; +- isc_result_t result; ++ isc_result_t result = ISC_R_SUCCESS; + dns_rdatatype_t rdtype; + dns_rdataclass_t rdclass; + dns_namelist_t *section = &msg->sections[DNS_SECTION_QUESTION]; + bool best_effort = ((options & DNS_MESSAGEPARSE_BESTEFFORT) != 0); + bool seen_problem = false; + bool free_name = false; ++ bool free_ht = false; ++ isc_ht_t *name_map = NULL; ++ ++ if (msg->counts[DNS_SECTION_QUESTION] > 1) { ++ isc_ht_init(&name_map, msg->mctx, 1, ISC_HT_CASE_INSENSITIVE); ++ } + + for (count = 0; count < msg->counts[DNS_SECTION_QUESTION]; count++) { + name = NULL; +@@ -994,13 +1025,19 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + goto cleanup; + } + ++ /* If there is only one QNAME, skip the duplicity checks */ ++ if (name_map == NULL) { ++ result = ISC_R_SUCCESS; ++ goto skip_name_check; ++ } ++ + /* + * Run through the section, looking to see if this name + * is already there. If it is found, put back the allocated + * name since we no longer need it, and set our name pointer + * to point to the name we found. + */ +- result = findname(&name2, name, section); ++ result = name_hash_add(name_map, name, &name2); + + /* + * If it is the first name in the section, accept it. +@@ -1012,19 +1049,25 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + * this should be legal or not. In either case we no longer + * need this name pointer. + */ +- if (result != ISC_R_SUCCESS) { ++ skip_name_check: ++ switch (result) { ++ case ISC_R_SUCCESS: + if (!ISC_LIST_EMPTY(*section)) { + DO_ERROR(DNS_R_FORMERR); + } + ISC_LIST_APPEND(*section, name, link); +- free_name = false; +- } else { ++ break; ++ case ISC_R_EXISTS: + dns_message_puttempname(msg, &name); + name = name2; + name2 = NULL; +- free_name = false; ++ break; ++ default: ++ UNREACHABLE(); + } + ++ free_name = false; ++ + /* + * Get type and class. + */ +@@ -1054,14 +1097,6 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + msg->tkey = 1; + } + +- /* +- * Can't ask the same question twice. +- */ +- result = dns_message_find(name, rdclass, rdtype, 0, NULL); +- if (result == ISC_R_SUCCESS) { +- DO_ERROR(DNS_R_FORMERR); +- } +- + /* + * Allocate a new rdatalist. + */ +@@ -1071,6 +1106,7 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + goto cleanup; + } + rdataset = isc_mempool_get(msg->rdspool); ++ dns_rdataset_init(rdataset); + + /* + * Convert rdatalist to rdataset, and attach the latter to +@@ -1078,8 +1114,6 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + */ + rdatalist->type = rdtype; + rdatalist->rdclass = rdclass; +- +- dns_rdataset_init(rdataset); + result = dns_rdatalist_tordataset(rdatalist, rdataset); + if (result != ISC_R_SUCCESS) { + goto cleanup; +@@ -1087,24 +1121,66 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + + rdataset->attributes |= DNS_RDATASETATTR_QUESTION; + ++ /* ++ * Skip the duplicity check for first rdataset ++ */ ++ if (ISC_LIST_EMPTY(name->list)) { ++ result = ISC_R_SUCCESS; ++ goto skip_rds_check; ++ } ++ ++ /* ++ * Can't ask the same question twice. ++ */ ++ if (name->ht == NULL) { ++ isc_ht_init(&name->ht, msg->mctx, 1, ++ ISC_HT_CASE_SENSITIVE); ++ free_ht = true; ++ ++ INSIST(ISC_LIST_HEAD(name->list) == ++ ISC_LIST_TAIL(name->list)); ++ ++ dns_rdataset_t *old_rdataset = ++ ISC_LIST_HEAD(name->list); ++ ++ result = rds_hash_add(name->ht, old_rdataset, NULL); ++ ++ INSIST(result == ISC_R_SUCCESS); ++ } ++ result = rds_hash_add(name->ht, rdataset, NULL); ++ if (result == ISC_R_EXISTS) { ++ DO_ERROR(DNS_R_FORMERR); ++ } ++ ++ skip_rds_check: + ISC_LIST_APPEND(name->list, rdataset, link); ++ + rdataset = NULL; + } + + if (seen_problem) { +- return (DNS_R_RECOVERABLE); ++ result = DNS_R_RECOVERABLE; + } +- return (ISC_R_SUCCESS); + + cleanup: + if (rdataset != NULL) { +- INSIST(!dns_rdataset_isassociated(rdataset)); ++ if (dns_rdataset_isassociated(rdataset)) { ++ dns_rdataset_disassociate(rdataset); ++ } + isc_mempool_put(msg->rdspool, rdataset); + } + if (free_name) { + dns_message_puttempname(msg, &name); + } + ++ if (free_ht) { ++ cleanup_name_hashmaps(section); ++ } ++ ++ if (name_map != NULL) { ++ isc_ht_destroy(&name_map); ++ } ++ + return (result); + } + +@@ -1184,17 +1260,24 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + dns_name_t *name = NULL; + dns_name_t *name2 = NULL; + dns_rdataset_t *rdataset = NULL; ++ dns_rdataset_t *found_rdataset = NULL; + dns_rdatalist_t *rdatalist = NULL; +- isc_result_t result; ++ isc_result_t result = ISC_R_SUCCESS; + dns_rdatatype_t rdtype, covers; + dns_rdataclass_t rdclass; + dns_rdata_t *rdata = NULL; + dns_ttl_t ttl; + dns_namelist_t *section = &msg->sections[sectionid]; +- bool free_name = false, free_rdataset = false, seen_problem = false; ++ bool free_name = false, seen_problem = false; ++ bool free_ht = false; + bool preserve_order = ((options & DNS_MESSAGEPARSE_PRESERVEORDER) != 0); + bool best_effort = ((options & DNS_MESSAGEPARSE_BESTEFFORT) != 0); + bool isedns, issigzero, istsig; ++ isc_ht_t *name_map = NULL; ++ ++ if (msg->counts[sectionid] > 1) { ++ isc_ht_init(&name_map, msg->mctx, 1, ISC_HT_CASE_INSENSITIVE); ++ } + + for (count = 0; count < msg->counts[sectionid]; count++) { + int recstart = source->current; +@@ -1202,10 +1285,10 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + + skip_name_search = false; + skip_type_search = false; +- free_rdataset = false; + isedns = false; + issigzero = false; + istsig = false; ++ found_rdataset = NULL; + + name = NULL; + result = dns_message_gettempname(msg, &name); +@@ -1245,8 +1328,8 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + if (msg->rdclass_set == 0 && + rdtype != dns_rdatatype_opt && /* class is UDP SIZE */ + rdtype != dns_rdatatype_tsig && /* class is ANY */ +- rdtype != dns_rdatatype_tkey) +- { /* class is undefined */ ++ rdtype != dns_rdatatype_tkey) /* class is undefined */ ++ { + msg->rdclass = rdclass; + msg->rdclass_set = 1; + } +@@ -1353,10 +1436,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + * Then put the meta-class back into the finished rdata. + */ + rdata = newrdata(msg); +- if (rdata == NULL) { +- result = ISC_R_NOMEMORY; +- goto cleanup; +- } + if (msg->opcode == dns_opcode_update && + update(sectionid, rdclass)) + { +@@ -1445,34 +1524,62 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + free_name = false; + } + } else { ++ if (name_map == NULL) { ++ result = ISC_R_SUCCESS; ++ goto skip_name_check; ++ } ++ + /* + * Run through the section, looking to see if this name + * is already there. If it is found, put back the + * allocated name since we no longer need it, and set + * our name pointer to point to the name we found. + */ +- result = findname(&name2, name, section); ++ result = name_hash_add(name_map, name, &name2); + + /* + * If it is a new name, append to the section. + */ +- if (result == ISC_R_SUCCESS) { ++ skip_name_check: ++ switch (result) { ++ case ISC_R_SUCCESS: ++ ISC_LIST_APPEND(*section, name, link); ++ break; ++ case ISC_R_EXISTS: + dns_message_puttempname(msg, &name); + name = name2; +- } else { +- ISC_LIST_APPEND(*section, name, link); ++ name2 = NULL; ++ break; ++ default: ++ UNREACHABLE(); + } + free_name = false; + } + ++ rdatalist = newrdatalist(msg); ++ rdatalist->type = rdtype; ++ rdatalist->covers = covers; ++ rdatalist->rdclass = rdclass; ++ rdatalist->ttl = ttl; ++ ++ dns_message_gettemprdataset(msg, &rdataset); ++ RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ++ ISC_R_SUCCESS); ++ dns_rdataset_setownercase(rdataset, name); ++ rdatalist = NULL; ++ + /* + * Search name for the particular type and class. + * Skip this stage if in update mode or this is a meta-type. + */ +- if (preserve_order || msg->opcode == dns_opcode_update || +- skip_type_search) ++ if (isedns || istsig || issigzero) { ++ /* Skip adding the rdataset to the tables */ ++ } else if (preserve_order || msg->opcode == dns_opcode_update || ++ skip_type_search) + { +- result = ISC_R_NOTFOUND; ++ result = ISC_R_SUCCESS; ++ ++ ISC_LIST_APPEND(name->list, rdataset, link); + } else { + /* + * If this is a type that can only occur in +@@ -1482,59 +1589,71 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + DO_ERROR(DNS_R_FORMERR); + } + +- rdataset = NULL; +- result = dns_message_find(name, rdclass, rdtype, covers, +- &rdataset); +- } +- +- /* +- * If we found an rdataset that matches, we need to +- * append this rdata to that set. If we did not, we need +- * to create a new rdatalist, store the important bits there, +- * convert it to an rdataset, and link the latter to the name. +- * Yuck. When appending, make certain that the type isn't +- * a singleton type, such as SOA or CNAME. +- * +- * Note that this check will be bypassed when preserving order, +- * the opcode is an update, or the type search is skipped. +- */ +- if (result == ISC_R_SUCCESS) { +- if (dns_rdatatype_issingleton(rdtype)) { +- dns_rdata_t *first; +- dns_rdatalist_fromrdataset(rdataset, +- &rdatalist); +- first = ISC_LIST_HEAD(rdatalist->rdata); +- INSIST(first != NULL); +- if (dns_rdata_compare(rdata, first) != 0) { +- DO_ERROR(DNS_R_FORMERR); +- } ++ if (ISC_LIST_EMPTY(name->list)) { ++ result = ISC_R_SUCCESS; ++ goto skip_rds_check; + } +- } + +- if (result == ISC_R_NOTFOUND) { +- rdataset = isc_mempool_get(msg->rdspool); +- free_rdataset = true; ++ if (name->ht == NULL) { ++ isc_ht_init(&name->ht, msg->mctx, 1, ++ ISC_HT_CASE_SENSITIVE); ++ free_ht = true; + +- rdatalist = newrdatalist(msg); +- if (rdatalist == NULL) { +- result = ISC_R_NOMEMORY; +- goto cleanup; ++ INSIST(ISC_LIST_HEAD(name->list) == ++ ISC_LIST_TAIL(name->list)); ++ ++ dns_rdataset_t *old_rdataset = ++ ISC_LIST_HEAD(name->list); ++ ++ result = rds_hash_add(name->ht, old_rdataset, ++ NULL); ++ ++ INSIST(result == ISC_R_SUCCESS); + } ++ found_rdataset = NULL; ++ result = rds_hash_add(name->ht, rdataset, ++ &found_rdataset); + +- rdatalist->type = rdtype; +- rdatalist->covers = covers; +- rdatalist->rdclass = rdclass; +- rdatalist->ttl = ttl; ++ /* ++ * If we found an rdataset that matches, we need to ++ * append this rdata to that set. If we did not, we ++ * need to create a new rdatalist, store the important ++ * bits there, convert it to an rdataset, and link the ++ * latter to the name. Yuck. When appending, make ++ * certain that the type isn't a singleton type, such as ++ * SOA or CNAME. ++ * ++ * Note that this check will be bypassed when preserving ++ * order, the opcode is an update, or the type search is ++ * skipped. ++ */ ++ skip_rds_check: ++ switch (result) { ++ case ISC_R_EXISTS: ++ /* Free the rdataset we used as the key */ ++ dns_rdataset_disassociate(rdataset); ++ isc_mempool_put(msg->rdspool, rdataset); ++ result = ISC_R_SUCCESS; ++ rdataset = found_rdataset; + +- dns_rdataset_init(rdataset); +- RUNTIME_CHECK( +- dns_rdatalist_tordataset(rdatalist, rdataset) == +- ISC_R_SUCCESS); +- dns_rdataset_setownercase(rdataset, name); ++ if (!dns_rdatatype_issingleton(rdtype)) { ++ break; ++ } + +- if (!isedns && !istsig && !issigzero) { ++ dns_rdatalist_fromrdataset(rdataset, ++ &rdatalist); ++ dns_rdata_t *first = ++ ISC_LIST_HEAD(rdatalist->rdata); ++ INSIST(first != NULL); ++ if (dns_rdata_compare(rdata, first) != 0) { ++ DO_ERROR(DNS_R_FORMERR); ++ } ++ break; ++ case ISC_R_SUCCESS: + ISC_LIST_APPEND(name->list, rdataset, link); +- free_rdataset = false; ++ break; ++ default: ++ UNREACHABLE(); + } + } + +@@ -1569,8 +1688,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + dns_rcode_t ercode; + + msg->opt = rdataset; +- rdataset = NULL; +- free_rdataset = false; + ercode = (dns_rcode_t)((msg->opt->ttl & + DNS_MESSAGE_EDNSRCODE_MASK) >> + 20); +@@ -1581,8 +1698,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + msg->sig0 = rdataset; + msg->sig0name = name; + msg->sigstart = recstart; +- rdataset = NULL; +- free_rdataset = false; + free_name = false; + } else if (istsig) { + msg->tsig = rdataset; +@@ -1592,22 +1707,17 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + * Windows doesn't like TSIG names to be compressed. + */ + msg->tsigname->attributes |= DNS_NAMEATTR_NOCOMPRESS; +- rdataset = NULL; +- free_rdataset = false; + free_name = false; + } ++ rdataset = NULL; + + if (seen_problem) { + if (free_name) { + dns_message_puttempname(msg, &name); + } +- if (free_rdataset) { +- isc_mempool_put(msg->rdspool, rdataset); +- } +- free_name = free_rdataset = false; ++ free_name = false; + } + INSIST(!free_name); +- INSIST(!free_rdataset); + } + + /* +@@ -1625,16 +1735,24 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + } + + if (seen_problem) { +- return (DNS_R_RECOVERABLE); ++ result = DNS_R_RECOVERABLE; + } +- return (ISC_R_SUCCESS); + + cleanup: ++ if (rdataset != NULL && rdataset != found_rdataset) { ++ dns_rdataset_disassociate(rdataset); ++ isc_mempool_put(msg->rdspool, rdataset); ++ } + if (free_name) { + dns_message_puttempname(msg, &name); + } +- if (free_rdataset) { +- isc_mempool_put(msg->rdspool, rdataset); ++ ++ if (free_ht) { ++ cleanup_name_hashmaps(section); ++ } ++ ++ if (name_map != NULL) { ++ isc_ht_destroy(&name_map); + } + + return (result); +@@ -2452,7 +2570,7 @@ dns_message_findname(dns_message_t *msg, dns_section_t section, + const dns_name_t *target, dns_rdatatype_t type, + dns_rdatatype_t covers, dns_name_t **name, + dns_rdataset_t **rdataset) { +- dns_name_t *foundname; ++ dns_name_t *foundname = NULL; + isc_result_t result; + + /* +@@ -2499,22 +2617,6 @@ dns_message_findname(dns_message_t *msg, dns_section_t section, + return (result); + } + +-void +-dns_message_movename(dns_message_t *msg, dns_name_t *name, +- dns_section_t fromsection, dns_section_t tosection) { +- REQUIRE(msg != NULL); +- REQUIRE(msg->from_to_wire == DNS_MESSAGE_INTENTRENDER); +- REQUIRE(name != NULL); +- REQUIRE(VALID_NAMED_SECTION(fromsection)); +- REQUIRE(VALID_NAMED_SECTION(tosection)); +- +- /* +- * Unlink the name from the old section +- */ +- ISC_LIST_UNLINK(msg->sections[fromsection], name, link); +- ISC_LIST_APPEND(msg->sections[tosection], name, link); +-} +- + void + dns_message_addname(dns_message_t *msg, dns_name_t *name, + dns_section_t section) { +@@ -2591,6 +2693,10 @@ dns_message_puttempname(dns_message_t *msg, dns_name_t **itemp) { + REQUIRE(!ISC_LINK_LINKED(item, link)); + REQUIRE(ISC_LIST_HEAD(item->list) == NULL); + ++ if (item->ht != NULL) { ++ isc_ht_destroy(&item->ht); ++ } ++ + /* + * we need to check this in case dns_name_dup() was used. + */ +diff --git a/lib/dns/name.c b/lib/dns/name.c +index 8a258a2..90044ba 100644 +--- a/lib/dns/name.c ++++ b/lib/dns/name.c +@@ -188,6 +188,7 @@ dns_name_invalidate(dns_name_t *name) { + name->offsets = NULL; + name->buffer = NULL; + ISC_LINK_INIT(name, link); ++ INSIST(name->ht == NULL); + } + + bool +diff --git a/lib/isc/ht.c b/lib/isc/ht.c +index eaf2b3c..e11050f 100644 +--- a/lib/isc/ht.c ++++ b/lib/isc/ht.c +@@ -93,11 +93,54 @@ maybe_rehash(isc_ht_t *ht, size_t newcount); + static isc_result_t + isc__ht_iter_next(isc_ht_iter_t *it); + ++static uint8_t maptolower[] = { ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, ++ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, ++ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, ++ 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, ++ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, ++ 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, ++ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, ++ 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, ++ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, ++ 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, ++ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, ++ 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, ++ 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, ++ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, ++ 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, ++ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, ++ 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, ++ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, ++ 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, ++ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, ++ 0xfc, 0xfd, 0xfe, 0xff ++}; ++ ++static int ++memcasecmp(const void *vs1, const void *vs2, size_t len) { ++ uint8_t const *s1 = vs1; ++ uint8_t const *s2 = vs2; ++ for (size_t i = 0; i < len; i++) { ++ uint8_t u1 = s1[i]; ++ uint8_t u2 = s2[i]; ++ int U1 = maptolower[u1]; ++ int U2 = maptolower[u2]; ++ int diff = U1 - U2; ++ if (diff) { ++ return diff; ++ } ++ } ++ return 0; ++} ++ + static bool + isc__ht_node_match(isc_ht_node_t *node, const uint32_t hashval, +- const uint8_t *key, uint32_t keysize) { ++ const uint8_t *key, uint32_t keysize, bool case_sensitive) { + return (node->hashval == hashval && node->keysize == keysize && +- memcmp(node->key, key, keysize) == 0); ++ (case_sensitive ? (memcmp(node->key, key, keysize) == 0) ++ : (memcasecmp(node->key, key, keysize) == 0))); + } + + static uint32_t +@@ -341,7 +384,9 @@ nexttable: + for (isc_ht_node_t *node = ht->table[findex][hash]; node != NULL; + node = node->next) + { +- if (isc__ht_node_match(node, hashval, key, keysize)) { ++ if (isc__ht_node_match(node, hashval, key, keysize, ++ ht->case_sensitive)) ++ { + return (node); + } + } +@@ -390,7 +435,9 @@ isc__ht_delete(isc_ht_t *ht, const unsigned char *key, const uint32_t keysize, + for (isc_ht_node_t *node = ht->table[idx][hash]; node != NULL; + prev = node, node = node->next) + { +- if (isc__ht_node_match(node, hashval, key, keysize)) { ++ if (isc__ht_node_match(node, hashval, key, keysize, ++ ht->case_sensitive)) ++ { + if (prev == NULL) { + ht->table[idx][hash] = node->next; + } else { +-- +2.33.0 + diff --git a/backport-CVE-2023-50387-CVE-2023-50868.patch b/backport-CVE-2023-50387-CVE-2023-50868.patch new file mode 100644 index 0000000000000000000000000000000000000000..08472b4da4825b12703fba7bb846f0a4afdd7dc1 --- /dev/null +++ b/backport-CVE-2023-50387-CVE-2023-50868.patch @@ -0,0 +1,600 @@ +From c12608ca934c0433d280e65fe6c631013e200cfe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Thu, 11 Jan 2024 12:03:24 +0100 +Subject: [PATCH] Split fast and slow task queues + +Change the taskmgr (and thus netmgr) in a way that it supports fast and +slow task queues. The fast queue is used for incoming DNS traffic and +it will pass the processing to the slow queue for sending outgoing DNS +messages and processing resolver messages. + +In the future, more tasks might get moved to the slow queues, so the +cached and authoritative DNS traffic can be handled without being slowed +down by operations that take longer time to process. + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind/9.18.24/patches/0004-CVE-2023-50387-CVE-2023-50868.patch + +(cherry picked from commit 1b3b0cef224e7a9e8279c5cfe2f7e188e3777cc7) +--- + lib/dns/dst_api.c | 27 +++++++++---- + lib/dns/include/dns/validator.h | 1 + + lib/dns/include/dst/dst.h | 4 ++ + lib/dns/resolver.c | 4 +- + lib/dns/validator.c | 67 +++++++++++++++------------------ + lib/isc/include/isc/netmgr.h | 3 ++ + lib/isc/netmgr/http.c | 18 ++++----- + lib/isc/netmgr/netmgr-int.h | 1 + + lib/isc/netmgr/netmgr.c | 38 ++++++++++++------- + lib/isc/netmgr/tcp.c | 6 +-- + lib/isc/netmgr/tcpdns.c | 4 +- + lib/isc/netmgr/tlsdns.c | 4 +- + lib/isc/netmgr/tlsstream.c | 12 +++--- + lib/isc/netmgr/udp.c | 6 +-- + 14 files changed, 109 insertions(+), 86 deletions(-) + +diff --git a/lib/dns/dst_api.c b/lib/dns/dst_api.c +index 4ffda8b..0658c69 100644 +--- a/lib/dns/dst_api.c ++++ b/lib/dns/dst_api.c +@@ -164,7 +164,8 @@ computeid(dst_key_t *key); + static isc_result_t + frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, + unsigned int protocol, dns_rdataclass_t rdclass, +- isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp); ++ isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, ++ dst_key_t **keyp); + + static isc_result_t + algorithm_status(unsigned int alg); +@@ -753,6 +754,13 @@ dst_key_todns(const dst_key_t *key, isc_buffer_t *target) { + isc_result_t + dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, + isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp) { ++ return (dst_key_fromdns_ex(name, rdclass, source, mctx, false, keyp)); ++} ++ ++isc_result_t ++dst_key_fromdns_ex(const dns_name_t *name, dns_rdataclass_t rdclass, ++ isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, ++ dst_key_t **keyp) { + uint8_t alg, proto; + uint32_t flags, extflags; + dst_key_t *key = NULL; +@@ -783,7 +791,7 @@ dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, + } + + result = frombuffer(name, alg, flags, proto, rdclass, source, mctx, +- &key); ++ no_rdata, &key); + if (result != ISC_R_SUCCESS) { + return (result); + } +@@ -804,7 +812,7 @@ dst_key_frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, + REQUIRE(dst_initialized); + + result = frombuffer(name, alg, flags, protocol, rdclass, source, mctx, +- &key); ++ false, &key); + if (result != ISC_R_SUCCESS) { + return (result); + } +@@ -2351,7 +2359,8 @@ computeid(dst_key_t *key) { + static isc_result_t + frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, + unsigned int protocol, dns_rdataclass_t rdclass, +- isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp) { ++ isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, ++ dst_key_t **keyp) { + dst_key_t *key; + isc_result_t ret; + +@@ -2376,10 +2385,12 @@ frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, + return (DST_R_UNSUPPORTEDALG); + } + +- ret = key->func->fromdns(key, source); +- if (ret != ISC_R_SUCCESS) { +- dst_key_free(&key); +- return (ret); ++ if (!no_rdata) { ++ ret = key->func->fromdns(key, source); ++ if (ret != ISC_R_SUCCESS) { ++ dst_key_free(&key); ++ return (ret); ++ } + } + } + +diff --git a/lib/dns/include/dns/validator.h b/lib/dns/include/dns/validator.h +index 383dcb4..352a60a 100644 +--- a/lib/dns/include/dns/validator.h ++++ b/lib/dns/include/dns/validator.h +@@ -148,6 +148,7 @@ struct dns_validator { + unsigned int depth; + unsigned int authcount; + unsigned int authfail; ++ bool failed; + isc_stdtime_t start; + }; + +diff --git a/lib/dns/include/dst/dst.h b/lib/dns/include/dst/dst.h +index ca292b0..f845e9b 100644 +--- a/lib/dns/include/dst/dst.h ++++ b/lib/dns/include/dst/dst.h +@@ -482,6 +482,10 @@ dst_key_tofile(const dst_key_t *key, int type, const char *directory); + */ + + isc_result_t ++dst_key_fromdns_ex(const dns_name_t *name, dns_rdataclass_t rdclass, ++ isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, ++ dst_key_t **keyp); ++isc_result_t + dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, + isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp); + /*%< +diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c +index 4b3d1c0..60cac29 100644 +--- a/lib/dns/resolver.c ++++ b/lib/dns/resolver.c +@@ -10408,8 +10408,8 @@ dns_resolver_create(dns_view_t *view, isc_taskmgr_t *taskmgr, + * Since we have a pool of tasks we bind them to task + * queues to spread the load evenly + */ +- result = isc_task_create_bound(taskmgr, 0, +- &res->buckets[i].task, i); ++ result = isc_task_create_bound( ++ taskmgr, 0, &res->buckets[i].task, ISC_NM_TASK_SLOW(i)); + if (result != ISC_R_SUCCESS) { + ntasks = i; + isc_mutex_destroy(&res->buckets[i].lock); +diff --git a/lib/dns/validator.c b/lib/dns/validator.c +index 56a0ced..47c4813 100644 +--- a/lib/dns/validator.c ++++ b/lib/dns/validator.c +@@ -1104,8 +1104,8 @@ create_validator(dns_validator_t *val, dns_name_t *name, dns_rdatatype_t type, + * 'rdataset'. If found, build a dst_key_t for it and point val->key at + * it. + * +- * If val->key is already non-NULL, locate it in the rdataset and then +- * search past it for the *next* key that could have signed 'siginfo', then ++ * If val->key is already non-NULL, start searching from the next position in ++ * 'rdataset' to find the *next* key that could have signed 'siginfo', then + * set val->key to that. + * + * Returns ISC_R_SUCCESS if a possible matching key has been found, +@@ -1118,59 +1118,59 @@ select_signing_key(dns_validator_t *val, dns_rdataset_t *rdataset) { + isc_buffer_t b; + dns_rdata_t rdata = DNS_RDATA_INIT; + dst_key_t *oldkey = val->key; +- bool foundold; ++ bool no_rdata = false; + + if (oldkey == NULL) { +- foundold = true; ++ result = dns_rdataset_first(rdataset); + } else { +- foundold = false; ++ dst_key_free(&oldkey); + val->key = NULL; ++ result = dns_rdataset_next(rdataset); + } +- +- result = dns_rdataset_first(rdataset); + if (result != ISC_R_SUCCESS) { +- goto failure; ++ goto done; + } ++ + do { + dns_rdataset_current(rdataset, &rdata); + + isc_buffer_init(&b, rdata.data, rdata.length); + isc_buffer_add(&b, rdata.length); + INSIST(val->key == NULL); +- result = dst_key_fromdns(&siginfo->signer, rdata.rdclass, &b, +- val->view->mctx, &val->key); ++ result = dst_key_fromdns_ex(&siginfo->signer, rdata.rdclass, &b, ++ val->view->mctx, no_rdata, ++ &val->key); + if (result == ISC_R_SUCCESS) { + if (siginfo->algorithm == + (dns_secalg_t)dst_key_alg(val->key) && + siginfo->keyid == + (dns_keytag_t)dst_key_id(val->key) && ++ (dst_key_flags(val->key) & DNS_KEYFLAG_REVOKE) == ++ 0 && + dst_key_iszonekey(val->key)) + { +- if (foundold) { +- /* +- * This is the key we're looking for. +- */ +- return (ISC_R_SUCCESS); +- } else if (dst_key_compare(oldkey, val->key)) { +- foundold = true; +- dst_key_free(&oldkey); ++ if (no_rdata) { ++ /* Retry with full key */ ++ dns_rdata_reset(&rdata); ++ dst_key_free(&val->key); ++ no_rdata = false; ++ continue; + } ++ /* This is the key we're looking for. */ ++ goto done; + } + dst_key_free(&val->key); + } + dns_rdata_reset(&rdata); + result = dns_rdataset_next(rdataset); ++ no_rdata = true; + } while (result == ISC_R_SUCCESS); + ++done: + if (result == ISC_R_NOMORE) { + result = ISC_R_NOTFOUND; + } + +-failure: +- if (oldkey != NULL) { +- dst_key_free(&oldkey); +- } +- + return (result); + } + +@@ -1589,20 +1589,9 @@ validate_answer(dns_validator_t *val, bool resume) { + continue; + } + +- do { +- isc_result_t tresult; +- vresult = verify(val, val->key, &rdata, +- val->siginfo->keyid); +- if (vresult == ISC_R_SUCCESS) { +- break; +- } +- +- tresult = select_signing_key(val, val->keyset); +- if (tresult != ISC_R_SUCCESS) { +- break; +- } +- } while (1); ++ vresult = verify(val, val->key, &rdata, val->siginfo->keyid); + if (vresult != ISC_R_SUCCESS) { ++ val->failed = true; + validator_log(val, ISC_LOG_DEBUG(3), + "failed to verify rdataset"); + } else { +@@ -1639,9 +1628,13 @@ validate_answer(dns_validator_t *val, bool resume) { + } else { + validator_log(val, ISC_LOG_DEBUG(3), + "verify failure: %s", +- isc_result_totext(result)); ++ isc_result_totext(vresult)); + resume = false; + } ++ if (val->failed) { ++ result = ISC_R_NOMORE; ++ break; ++ } + } + if (result != ISC_R_NOMORE) { + validator_log(val, ISC_LOG_DEBUG(3), +diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h +index eff33f6..d42cfe9 100644 +--- a/lib/isc/include/isc/netmgr.h ++++ b/lib/isc/include/isc/netmgr.h +@@ -750,6 +750,9 @@ isc_nm_verify_tls_peer_result_string(const isc_nmhandle_t *handle); + * \li 'handle' is a valid netmgr handle object. + */ + ++#define ISC_NM_TASK_SLOW_OFFSET -2 ++#define ISC_NM_TASK_SLOW(i) (ISC_NM_TASK_SLOW_OFFSET - 1 - i) ++ + void + isc_nm_task_enqueue(isc_nm_t *mgr, isc_task_t *task, int threadid); + /*%< +diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c +index d7a33d5..2220edf 100644 +--- a/lib/isc/netmgr/http.c ++++ b/lib/isc/netmgr/http.c +@@ -2969,7 +2969,7 @@ isc__nm_http_set_max_streams(isc_nmsocket_t *listener, + void + isc_nm_http_set_endpoints(isc_nmsocket_t *listener, + isc_nm_http_endpoints_t *eps) { +- size_t nworkers; ++ size_t nlisteners; + + REQUIRE(VALID_NMSOCK(listener)); + REQUIRE(listener->type == isc_nm_httplistener); +@@ -2977,8 +2977,8 @@ isc_nm_http_set_endpoints(isc_nmsocket_t *listener, + + atomic_store(&eps->in_use, true); + +- nworkers = (size_t)listener->mgr->nworkers; +- for (size_t i = 0; i < nworkers; i++) { ++ nlisteners = (size_t)listener->mgr->nlisteners; ++ for (size_t i = 0; i < nlisteners; i++) { + isc__netievent__http_eps_t *ievent = + isc__nm_get_netievent_httpendpoints(listener->mgr, + listener, eps); +@@ -3003,20 +3003,20 @@ isc__nm_async_httpendpoints(isc__networker_t *worker, isc__netievent_t *ev0) { + static void + http_init_listener_endpoints(isc_nmsocket_t *listener, + isc_nm_http_endpoints_t *epset) { +- size_t nworkers; ++ size_t nlisteners; + + REQUIRE(VALID_NMSOCK(listener)); + REQUIRE(VALID_NM(listener->mgr)); + REQUIRE(VALID_HTTP_ENDPOINTS(epset)); + +- nworkers = (size_t)listener->mgr->nworkers; +- INSIST(nworkers > 0); ++ nlisteners = (size_t)listener->mgr->nlisteners; ++ INSIST(nlisteners > 0); + + listener->h2.listener_endpoints = + isc_mem_get(listener->mgr->mctx, +- sizeof(isc_nm_http_endpoints_t *) * nworkers); +- listener->h2.n_listener_endpoints = nworkers; +- for (size_t i = 0; i < nworkers; i++) { ++ sizeof(isc_nm_http_endpoints_t *) * nlisteners); ++ listener->h2.n_listener_endpoints = nlisteners; ++ for (size_t i = 0; i < nlisteners; i++) { + listener->h2.listener_endpoints[i] = NULL; + isc_nm_http_endpoints_attach( + epset, &listener->h2.listener_endpoints[i]); +diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h +index 364a933..6aca9ab 100644 +--- a/lib/isc/netmgr/netmgr-int.h ++++ b/lib/isc/netmgr/netmgr-int.h +@@ -776,6 +776,7 @@ struct isc_nm { + isc_refcount_t references; + isc_mem_t *mctx; + int nworkers; ++ int nlisteners; + isc_mutex_t lock; + isc_condition_t wkstatecond; + isc_condition_t wkpausecond; +diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c +index b19d468..2310b4b 100644 +--- a/lib/isc/netmgr/netmgr.c ++++ b/lib/isc/netmgr/netmgr.c +@@ -189,12 +189,12 @@ isc__nm_force_tid(int tid) { + } + + static void +-isc__nm_threadpool_initialize(uint32_t workers) { ++isc__nm_threadpool_initialize(uint32_t nworkers) { + char buf[11]; + int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf, + &(size_t){ sizeof(buf) }); + if (r == UV_ENOENT) { +- snprintf(buf, sizeof(buf), "%" PRIu32, workers); ++ snprintf(buf, sizeof(buf), "%" PRIu32, nworkers); + uv_os_setenv("UV_THREADPOOL_SIZE", buf); + } + } +@@ -212,11 +212,11 @@ isc__nm_threadpool_initialize(uint32_t workers) { + #endif + + void +-isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { ++isc__netmgr_create(isc_mem_t *mctx, uint32_t nworkers, isc_nm_t **netmgrp) { + isc_nm_t *mgr = NULL; + char name[32]; + +- REQUIRE(workers > 0); ++ REQUIRE(nworkers > 0); + + #ifdef MAXIMAL_UV_VERSION + if (uv_version() > MAXIMAL_UV_VERSION) { +@@ -234,10 +234,13 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { + uv_version_string(), UV_VERSION_STRING); + } + +- isc__nm_threadpool_initialize(workers); ++ isc__nm_threadpool_initialize(nworkers); + + mgr = isc_mem_get(mctx, sizeof(*mgr)); +- *mgr = (isc_nm_t){ .nworkers = workers }; ++ *mgr = (isc_nm_t){ ++ .nworkers = nworkers * 2, ++ .nlisteners = nworkers, ++ }; + + isc_mem_attach(mctx, &mgr->mctx); + isc_mutex_init(&mgr->lock); +@@ -272,11 +275,12 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { + atomic_init(&mgr->keepalive, 30000); + atomic_init(&mgr->advertised, 30000); + +- isc_barrier_init(&mgr->pausing, workers); +- isc_barrier_init(&mgr->resuming, workers); ++ isc_barrier_init(&mgr->pausing, mgr->nworkers); ++ isc_barrier_init(&mgr->resuming, mgr->nworkers); + +- mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t)); +- for (size_t i = 0; i < workers; i++) { ++ mgr->workers = isc_mem_get(mctx, ++ mgr->nworkers * sizeof(isc__networker_t)); ++ for (int i = 0; i < mgr->nworkers; i++) { + isc__networker_t *worker = &mgr->workers[i]; + int r; + +@@ -310,7 +314,7 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { + mgr->workers_running++; + isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread); + +- snprintf(name, sizeof(name), "isc-net-%04zu", i); ++ snprintf(name, sizeof(name), "isc-net-%04d", i); + isc_thread_setname(worker->thread, name); + } + +@@ -817,9 +821,15 @@ isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) { + isc__networker_t *worker = NULL; + + if (threadid == -1) { +- tid = (int)isc_random_uniform(nm->nworkers); ++ tid = (int)isc_random_uniform(nm->nlisteners); ++ } else if (threadid == ISC_NM_TASK_SLOW_OFFSET) { ++ tid = nm->nlisteners + ++ (int)isc_random_uniform(nm->nworkers - nm->nlisteners); ++ } else if (threadid < ISC_NM_TASK_SLOW_OFFSET) { ++ tid = nm->nlisteners + (ISC_NM_TASK_SLOW(threadid) % ++ (nm->nworkers - nm->nlisteners)); + } else { +- tid = threadid % nm->nworkers; ++ tid = threadid % nm->nlisteners; + } + + worker = &nm->workers[tid]; +@@ -3778,7 +3788,7 @@ isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0) { + static void + set_tlsctx_workers(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx) { + /* Update the TLS context reference for every worker thread. */ +- for (size_t i = 0; i < (size_t)listener->mgr->nworkers; i++) { ++ for (size_t i = 0; i < (size_t)listener->mgr->nlisteners; i++) { + isc__netievent__tlsctx_t *ievent = + isc__nm_get_netievent_settlsctx(listener->mgr, listener, + tlsctx); +diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c +index 2a644fe..16b53cc 100644 +--- a/lib/isc/netmgr/tcp.c ++++ b/lib/isc/netmgr/tcp.c +@@ -341,7 +341,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, + isc__nm_connectcb(sock, req, result, false); + } else { + isc__nmsocket_clearcb(sock); +- sock->tid = isc_random_uniform(mgr->nworkers); ++ sock->tid = isc_random_uniform(mgr->nlisteners); + isc__nm_connectcb(sock, req, result, true); + } + atomic_store(&sock->closed, true); +@@ -362,7 +362,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, + isc__nm_put_netievent_tcpconnect(mgr, ievent); + } else { + atomic_init(&sock->active, false); +- sock->tid = isc_random_uniform(mgr->nworkers); ++ sock->tid = isc_random_uniform(mgr->nlisteners); + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + } +@@ -457,7 +457,7 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, + isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface); + + atomic_init(&sock->rchildren, 0); +- sock->nchildren = mgr->nworkers; ++ sock->nchildren = mgr->nlisteners; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); +diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c +index eda6aa6..46958d0 100644 +--- a/lib/isc/netmgr/tcpdns.c ++++ b/lib/isc/netmgr/tcpdns.c +@@ -324,7 +324,7 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, + isc__nm_put_netievent_tcpdnsconnect(mgr, ievent); + } else { + atomic_init(&sock->active, false); +- sock->tid = isc_random_uniform(mgr->nworkers); ++ sock->tid = isc_random_uniform(mgr->nlisteners); + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + } +@@ -422,7 +422,7 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, + isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface); + + atomic_init(&sock->rchildren, 0); +- sock->nchildren = mgr->nworkers; ++ sock->nchildren = mgr->nlisteners; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); +diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c +index d30e33f..40e6fc8 100644 +--- a/lib/isc/netmgr/tlsdns.c ++++ b/lib/isc/netmgr/tlsdns.c +@@ -419,7 +419,7 @@ isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, + isc__nm_put_netievent_tlsdnsconnect(mgr, ievent); + } else { + atomic_init(&sock->active, false); +- sock->tid = isc_random_uniform(mgr->nworkers); ++ sock->tid = isc_random_uniform(mgr->nlisteners); + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)ievent); + } +@@ -532,7 +532,7 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, + isc__nmsocket_init(sock, mgr, isc_nm_tlsdnslistener, iface); + + atomic_init(&sock->rchildren, 0); +- sock->nchildren = mgr->nworkers; ++ sock->nchildren = mgr->nlisteners; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); +diff --git a/lib/isc/netmgr/tlsstream.c b/lib/isc/netmgr/tlsstream.c +index 7b49071..a3fc6d2 100644 +--- a/lib/isc/netmgr/tlsstream.c ++++ b/lib/isc/netmgr/tlsstream.c +@@ -1264,18 +1264,18 @@ isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle) { + + static void + tls_init_listener_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *ctx) { +- size_t nworkers; ++ size_t nlisteners; + + REQUIRE(VALID_NM(listener->mgr)); + REQUIRE(ctx != NULL); + +- nworkers = (size_t)listener->mgr->nworkers; +- INSIST(nworkers > 0); ++ nlisteners = (size_t)listener->mgr->nlisteners; ++ INSIST(nlisteners > 0); + + listener->tlsstream.listener_tls_ctx = isc_mem_get( +- listener->mgr->mctx, sizeof(isc_tlsctx_t *) * nworkers); +- listener->tlsstream.n_listener_tls_ctx = nworkers; +- for (size_t i = 0; i < nworkers; i++) { ++ listener->mgr->mctx, sizeof(isc_tlsctx_t *) * nlisteners); ++ listener->tlsstream.n_listener_tls_ctx = nlisteners; ++ for (size_t i = 0; i < nlisteners; i++) { + listener->tlsstream.listener_tls_ctx[i] = NULL; + isc_tlsctx_attach(ctx, + &listener->tlsstream.listener_tls_ctx[i]); +diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c +index 476c799..661de96 100644 +--- a/lib/isc/netmgr/udp.c ++++ b/lib/isc/netmgr/udp.c +@@ -157,14 +157,14 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, + REQUIRE(VALID_NM(mgr)); + + /* +- * We are creating mgr->nworkers duplicated sockets, one ++ * We are creating mgr->nlisteners duplicated sockets, one + * socket for each worker thread. + */ + sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); + isc__nmsocket_init(sock, mgr, isc_nm_udplistener, iface); + + atomic_init(&sock->rchildren, 0); +- sock->nchildren = mgr->nworkers; ++ sock->nchildren = mgr->nlisteners; + children_size = sock->nchildren * sizeof(sock->children[0]); + sock->children = isc_mem_get(mgr->mctx, children_size); + memset(sock->children, 0, children_size); +@@ -1037,7 +1037,7 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, + isc__nm_put_netievent_udpconnect(mgr, event); + } else { + atomic_init(&sock->active, false); +- sock->tid = isc_random_uniform(mgr->nworkers); ++ sock->tid = isc_random_uniform(mgr->nlisteners); + isc__nm_enqueue_ievent(&mgr->workers[sock->tid], + (isc__netievent_t *)event); + } +-- +2.33.0 + diff --git a/backport-CVE-2023-5517.patch b/backport-CVE-2023-5517.patch new file mode 100644 index 0000000000000000000000000000000000000000..4f7f1bbbdfcb8d7ceb1f2d98d0ee34476875dc96 --- /dev/null +++ b/backport-CVE-2023-5517.patch @@ -0,0 +1,108 @@ +From c73262493658cb8623927ef6cc2f023501f7e809 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Tue, 10 Oct 2023 10:58:18 +1100 +Subject: [PATCH] Save the correct result value to resume with + nxdomain-redirect + +The wrong result value was being saved for resumption with +nxdomain-redirect when performing the fetch. This lead to an assert +when checking that RFC 1918 reverse queries where not leaking to +the global internet. + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind/9.18.24/patches/0002-CVE-2023-5517.patch + +(cherry picked from commit 9d0fa07c5e7a39db89862a4f843d2190059afb4b) +--- + lib/ns/query.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +diff --git a/lib/ns/query.c b/lib/ns/query.c +index c1e9148..61749c8 100644 +--- a/lib/ns/query.c ++++ b/lib/ns/query.c +@@ -465,10 +465,10 @@ static void + query_addnxrrsetnsec(query_ctx_t *qctx); + + static isc_result_t +-query_nxdomain(query_ctx_t *qctx, isc_result_t res); ++query_nxdomain(query_ctx_t *qctx, isc_result_t result); + + static isc_result_t +-query_redirect(query_ctx_t *qctx); ++query_redirect(query_ctx_t *qctx, isc_result_t result); + + static isc_result_t + query_ncache(query_ctx_t *qctx, isc_result_t result); +@@ -7718,8 +7718,7 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) { + * result from the search. + */ + static isc_result_t +-query_gotanswer(query_ctx_t *qctx, isc_result_t res) { +- isc_result_t result = res; ++query_gotanswer(query_ctx_t *qctx, isc_result_t result) { + char errmsg[256]; + + CCTRACE(ISC_LOG_DEBUG(3), "query_gotanswer"); +@@ -7795,7 +7794,7 @@ root_key_sentinel: + return (query_coveringnsec(qctx)); + + case DNS_R_NCACHENXDOMAIN: +- result = query_redirect(qctx); ++ result = query_redirect(qctx, result); + if (result != ISC_R_COMPLETE) { + return (result); + } +@@ -9612,11 +9611,10 @@ query_addnxrrsetnsec(query_ctx_t *qctx) { + * Handle NXDOMAIN and empty wildcard responses. + */ + static isc_result_t +-query_nxdomain(query_ctx_t *qctx, isc_result_t res) { ++query_nxdomain(query_ctx_t *qctx, isc_result_t result) { + dns_section_t section; + uint32_t ttl; +- isc_result_t result = res; +- bool empty_wild = (res == DNS_R_EMPTYWILD); ++ bool empty_wild = (result == DNS_R_EMPTYWILD); + + CCTRACE(ISC_LOG_DEBUG(3), "query_nxdomain"); + +@@ -9625,7 +9623,7 @@ query_nxdomain(query_ctx_t *qctx, isc_result_t res) { + INSIST(qctx->is_zone || REDIRECT(qctx->client)); + + if (!empty_wild) { +- result = query_redirect(qctx); ++ result = query_redirect(qctx, result); + if (result != ISC_R_COMPLETE) { + return (result); + } +@@ -9713,7 +9711,7 @@ cleanup: + * redirecting, so query processing should continue past it. + */ + static isc_result_t +-query_redirect(query_ctx_t *qctx) { ++query_redirect(query_ctx_t *qctx, isc_result_t saved_result) { + isc_result_t result; + + CCTRACE(ISC_LOG_DEBUG(3), "query_redirect"); +@@ -9754,7 +9752,7 @@ query_redirect(query_ctx_t *qctx) { + SAVE(qctx->client->query.redirect.rdataset, qctx->rdataset); + SAVE(qctx->client->query.redirect.sigrdataset, + qctx->sigrdataset); +- qctx->client->query.redirect.result = DNS_R_NCACHENXDOMAIN; ++ qctx->client->query.redirect.result = saved_result; + dns_name_copy(qctx->fname, qctx->client->query.redirect.fname); + qctx->client->query.redirect.authoritative = + qctx->authoritative; +@@ -10415,7 +10413,7 @@ query_coveringnsec(query_ctx_t *qctx) { + * We now have the proof that we have an NXDOMAIN. Apply + * NXDOMAIN redirection if configured. + */ +- result = query_redirect(qctx); ++ result = query_redirect(qctx, DNS_R_COVERINGNSEC); + if (result != ISC_R_COMPLETE) { + redirected = true; + goto cleanup; +-- +2.33.0 + diff --git a/backport-CVE-2023-5679.patch b/backport-CVE-2023-5679.patch new file mode 100644 index 0000000000000000000000000000000000000000..d593b35afeed6d02aa95308c7da8db1c409a3385 --- /dev/null +++ b/backport-CVE-2023-5679.patch @@ -0,0 +1,38 @@ +From 7db2796507127b40e2f091dafb842c6a7e86b9a8 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Thu, 12 Oct 2023 12:01:46 +1100 +Subject: [PATCH] Restore dns64 state during serve-stale processing + +If we are in the process of looking for the A records as part of +dns64 processing and the server-stale timeout triggers, redo the +dns64 changes that had been made to the orignal qctx. + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind/9.18.24/patches/0003-CVE-2023-5679.patch + +(cherry picked from commit 1fcc483df13e049b96f620e515f0d4d45f3680b7) +--- + lib/ns/query.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/lib/ns/query.c b/lib/ns/query.c +index 61749c8..40e1232 100644 +--- a/lib/ns/query.c ++++ b/lib/ns/query.c +@@ -6228,6 +6228,13 @@ query_lookup_stale(ns_client_t *client) { + query_ctx_t qctx; + + qctx_init(client, NULL, client->query.qtype, &qctx); ++ if (DNS64(client)) { ++ qctx.qtype = qctx.type = dns_rdatatype_a; ++ qctx.dns64 = true; ++ } ++ if (DNS64EXCLUDE(client)) { ++ qctx.dns64_exclude = true; ++ } + dns_db_attach(client->view->cachedb, &qctx.db); + client->query.attributes &= ~NS_QUERYATTR_RECURSIONOK; + client->query.dboptions |= DNS_DBFIND_STALETIMEOUT; +-- +2.33.0 + diff --git a/backport-CVE-2024-0760.patch b/backport-CVE-2024-0760.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e3360f864e9111b521ca2a29df41f7d9f9b127c --- /dev/null +++ b/backport-CVE-2024-0760.patch @@ -0,0 +1,981 @@ +From c33b3d26f695d342af3fa81ab404a366bb8ce873 Mon Sep 17 00:00:00 2001 +From: Artem Boldariev +Date: Wed, 3 Jul 2024 13:58:32 +0300 +Subject: [PATCH] TCP/TLS DNS: unthrottle only when all input data processing + +This commit ensures that we restart reading only when all DNS data in +the input buffer is processed so the we will not get into the +situation when the buffer is overrun. + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind9/9.18.28/patches/0001-CVE-2024-0760.patch + +--- + lib/isc/netmgr/netmgr-int.h | 27 +++++-- + lib/isc/netmgr/netmgr.c | 79 ++++++++++++++---- + lib/isc/netmgr/tcp.c | 71 +++++++++++++++- + lib/isc/netmgr/tcpdns.c | 59 +++++++++++++- + lib/isc/netmgr/tlsdns.c | 120 ++++++++++++++++++++------- + lib/ns/client.c | 156 +++++++++++++++++------------------- + lib/ns/include/ns/client.h | 6 +- + 7 files changed, 379 insertions(+), 139 deletions(-) + +diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h +index 6aca9ab..bc1ba73 100644 +--- a/lib/isc/netmgr/netmgr-int.h ++++ b/lib/isc/netmgr/netmgr-int.h +@@ -62,9 +62,10 @@ + #endif + + /* +- * The TCP receive buffer can fit one maximum sized DNS message plus its size, +- * the receive buffer here affects TCP, DoT and DoH. ++ * The TCP send and receive buffers can fit one maximum sized DNS message plus ++ * its size, the receive buffer here affects TCP, DoT and DoH. + */ ++#define ISC_NETMGR_TCP_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) + #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) + + /* Pick the larger buffer */ +@@ -377,9 +378,10 @@ struct isc__nm_uvreq { + int magic; + isc_nmsocket_t *sock; + isc_nmhandle_t *handle; +- char tcplen[2]; /* The TCP DNS message length */ +- uv_buf_t uvbuf; /* translated isc_region_t, to be +- * sent or received */ ++ char tcplen[2]; /* The TCP DNS message length */ ++ uv_buf_t uvbuf; /* translated isc_region_t, to be ++ * sent or received */ ++ isc_region_t userbuf; + isc_sockaddr_t local; /* local address */ + isc_sockaddr_t peer; /* peer address */ + isc__nm_cb_t cb; /* callback */ +@@ -998,7 +1000,6 @@ struct isc_nmsocket { + TLS_STATE_ERROR, + TLS_STATE_CLOSING + } state; +- isc_region_t senddata; + ISC_LIST(isc__nm_uvreq_t) sendreqs; + bool cycle; + isc_result_t pending_error; +@@ -1063,6 +1064,12 @@ struct isc_nmsocket { + */ + uint64_t write_timeout; + ++ /* ++ * Reading was throttled over TCP as the peer does not read the ++ * data we are sending back. ++ */ ++ bool reading_throttled; ++ + /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ + isc_nmsocket_t *outer; + +@@ -2265,6 +2272,14 @@ isc__nmsocket_readtimeout_cb(uv_timer_t *timer); + void + isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult); + ++/*%< ++ * ++ * Maximum number of simultaneous handles in flight supported for a single ++ * connected TCPDNS socket. This value was chosen arbitrarily, and may be ++ * changed in the future. ++ */ ++#define STREAM_CLIENTS_PER_CONN 23 ++ + #define UV_RUNTIME_CHECK(func, ret) \ + if (ret != 0) { \ + FATAL_ERROR("%s failed: %s\n", #func, uv_strerror(ret)); \ +diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c +index 2310b4b..f9e3b70 100644 +--- a/lib/isc/netmgr/netmgr.c ++++ b/lib/isc/netmgr/netmgr.c +@@ -49,8 +49,15 @@ + * How many isc_nmhandles and isc_nm_uvreqs will we be + * caching for reuse in a socket. + */ +-#define ISC_NM_HANDLES_STACK_SIZE 600 +-#define ISC_NM_REQS_STACK_SIZE 600 ++#define ISC_NM_HANDLES_STACK_SIZE 16 ++#define ISC_NM_REQS_STACK_SIZE 16 ++ ++/*% ++ * Same, but for UDP sockets which tend to need larger values as they ++ * process many requests per socket. ++ */ ++#define ISC_NM_HANDLES_STACK_SIZE_UDP 64 ++#define ISC_NM_REQS_STACK_SIZE_UDP 64 + + /*% + * Shortcut index arrays to get access to statistics counters. +@@ -1506,16 +1513,25 @@ void + isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, + isc_sockaddr_t *iface FLARG) { + uint16_t family; ++ size_t inactive_handles_stack_size = ISC_NM_HANDLES_STACK_SIZE; ++ size_t inactive_reqs_stack_size = ISC_NM_REQS_STACK_SIZE; + + REQUIRE(sock != NULL); + REQUIRE(mgr != NULL); + +- *sock = (isc_nmsocket_t){ .type = type, +- .fd = -1, +- .inactivehandles = isc_astack_new( +- mgr->mctx, ISC_NM_HANDLES_STACK_SIZE), +- .inactivereqs = isc_astack_new( +- mgr->mctx, ISC_NM_REQS_STACK_SIZE) }; ++ if (type == isc_nm_udpsocket) { ++ inactive_handles_stack_size = ISC_NM_HANDLES_STACK_SIZE_UDP; ++ inactive_reqs_stack_size = ISC_NM_REQS_STACK_SIZE_UDP; ++ } ++ ++ *sock = (isc_nmsocket_t){ ++ .type = type, ++ .fd = -1, ++ .inactivehandles = isc_astack_new(mgr->mctx, ++ inactive_handles_stack_size), ++ .inactivereqs = isc_astack_new(mgr->mctx, ++ inactive_reqs_stack_size) ++ }; + + ISC_LIST_INIT(sock->tls.sendreqs); + +@@ -2084,6 +2100,7 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) { + + sock = req->sock; + ++ isc__nm_start_reading(sock); + isc__nmsocket_reset(sock); + } + +@@ -2093,7 +2110,6 @@ isc__nmsocket_readtimeout_cb(uv_timer_t *timer) { + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); +- REQUIRE(atomic_load(&sock->reading)); + + if (atomic_load(&sock->client)) { + uv_timer_stop(timer); +@@ -2340,8 +2356,10 @@ processbuffer(isc_nmsocket_t *sock) { + * timers. If we do have a full message, reset the timer. + * + * Stop reading if this is a client socket, or if the server socket +- * has been set to sequential mode. In this case we'll be called again +- * later by isc__nm_resume_processing(). ++ * has been set to sequential mode, or the number of queries we are ++ * processing simultaneously has reached the clients-per-connection ++ * limit. In this case we'll be called again later by ++ * isc__nm_resume_processing(). + */ + isc_result_t + isc__nm_process_sock_buffer(isc_nmsocket_t *sock) { +@@ -2349,14 +2367,41 @@ isc__nm_process_sock_buffer(isc_nmsocket_t *sock) { + int_fast32_t ah = atomic_load(&sock->ah); + isc_result_t result = processbuffer(sock); + switch (result) { +- case ISC_R_NOMORE: ++ case ISC_R_NOMORE: { + /* + * Don't reset the timer until we have a + * full DNS message. + */ +- result = isc__nm_start_reading(sock); +- if (result != ISC_R_SUCCESS) { +- return (result); ++ ++ /* ++ * Restart reading if we have less data in the send ++ * queue than the send buffer size, this means that the ++ * TCP client has started reading some data again. ++ * Starting reading when we go under the limit instead ++ * of waiting for all data has been flushed allows ++ * faster recovery (in case there was a congestion and ++ * now there isn't). ++ */ ++ size_t write_queue_size = ++ uv_stream_get_write_queue_size( ++ &sock->uv_handle.stream); ++ if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { ++ if (sock->reading_throttled) { ++ isc_log_write(isc_lctx, ++ ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, ++ ISC_LOG_DEBUG(3), ++ "resuming TCP " ++ "connection, the other " ++ "side is reading the " ++ "data again (%zu)", ++ write_queue_size); ++ sock->reading_throttled = false; ++ } ++ result = isc__nm_start_reading(sock); ++ if (result != ISC_R_SUCCESS) { ++ return (result); ++ } + } + /* + * Start the timer only if there are no externally used +@@ -2368,6 +2413,7 @@ isc__nm_process_sock_buffer(isc_nmsocket_t *sock) { + isc__nmsocket_timer_start(sock); + } + goto done; ++ } + case ISC_R_CANCELED: + isc__nmsocket_timer_stop(sock); + isc__nm_stop_reading(sock); +@@ -2381,7 +2427,8 @@ isc__nm_process_sock_buffer(isc_nmsocket_t *sock) { + isc__nmsocket_timer_stop(sock); + + if (atomic_load(&sock->client) || +- atomic_load(&sock->sequential)) ++ atomic_load(&sock->sequential) || ++ atomic_load(&sock->ah) >= STREAM_CLIENTS_PER_CONN) + { + isc__nm_stop_reading(sock); + goto done; +diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c +index 16b53cc..37d44bd 100644 +--- a/lib/isc/netmgr/tcp.c ++++ b/lib/isc/netmgr/tcp.c +@@ -766,7 +766,7 @@ isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpstartread_t *ievent = + (isc__netievent_tcpstartread_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; +- isc_result_t result; ++ isc_result_t result = ISC_R_SUCCESS; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); +@@ -774,7 +774,7 @@ isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) { + + if (isc__nmsocket_closing(sock)) { + result = ISC_R_CANCELED; +- } else { ++ } else if (!sock->reading_throttled) { + result = isc__nm_start_reading(sock); + } + +@@ -905,6 +905,32 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { + + /* The readcb could have paused the reading */ + if (atomic_load(&sock->reading)) { ++ if (!sock->client) { ++ /* ++ * Stop reading if we have accumulated enough bytes in ++ * the send queue; this means that the TCP client is not ++ * reading back the data we sending to it, and there's ++ * no reason to continue processing more incoming DNS ++ * messages, if the client is not reading back the ++ * responses. ++ */ ++ size_t write_queue_size = ++ uv_stream_get_write_queue_size( ++ &sock->uv_handle.stream); ++ ++ if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { ++ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, ++ ISC_LOG_DEBUG(3), ++ "throttling TCP connection, " ++ "the other side is " ++ "not reading the data (%zu)", ++ write_queue_size); ++ sock->reading_throttled = true; ++ isc__nm_stop_reading(sock); ++ } ++ } ++ + /* The timer will be updated */ + isc__nmsocket_timer_restart(sock); + } +@@ -1095,6 +1121,34 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, + return; + } + ++static void ++tcp_maybe_restart_reading(isc_nmsocket_t *sock) { ++ if (!sock->client && sock->reading_throttled && ++ !uv_is_active(&sock->uv_handle.handle)) ++ { ++ /* ++ * Restart reading if we have less data in the send queue than ++ * the send buffer size, this means that the TCP client has ++ * started reading some data again. Starting reading when we go ++ * under the limit instead of waiting for all data has been ++ * flushed allows faster recovery (in case there was a ++ * congestion and now there isn't). ++ */ ++ size_t write_queue_size = ++ uv_stream_get_write_queue_size(&sock->uv_handle.stream); ++ if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { ++ isc_log_write( ++ isc_lctx, ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), ++ "resuming TCP connection, the other side " ++ "is reading the data again (%zu)", ++ write_queue_size); ++ sock->reading_throttled = false; ++ isc__nm_start_reading(sock); ++ } ++ } ++} ++ + static void + tcp_send_cb(uv_write_t *req, int status) { + isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; +@@ -1112,10 +1166,23 @@ tcp_send_cb(uv_write_t *req, int status) { + isc__nm_incstats(sock, STATID_SENDFAIL); + isc__nm_failed_send_cb(sock, uvreq, + isc__nm_uverr2result(status)); ++ ++ if (!sock->client && ++ (atomic_load(&sock->reading) || sock->reading_throttled)) ++ { ++ /* ++ * As we are resuming reading, it is not throttled ++ * anymore (technically). ++ */ ++ sock->reading_throttled = false; ++ isc__nm_start_reading(sock); ++ isc__nmsocket_reset(sock); ++ } + return; + } + + isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); ++ tcp_maybe_restart_reading(sock); + } + + /* +diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c +index 46958d0..6d417f7 100644 +--- a/lib/isc/netmgr/tcpdns.c ++++ b/lib/isc/netmgr/tcpdns.c +@@ -733,7 +733,7 @@ isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_tcpdnsread_t *ievent = + (isc__netievent_tcpdnsread_t *)ev0; + isc_nmsocket_t *sock = ievent->sock; +- isc_result_t result; ++ isc_result_t result = ISC_R_SUCCESS; + + UNUSED(worker); + +@@ -742,7 +742,7 @@ isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) { + + if (isc__nmsocket_closing(sock)) { + result = ISC_R_CANCELED; +- } else { ++ } else if (!sock->reading_throttled) { + result = isc__nm_process_sock_buffer(sock); + } + +@@ -905,6 +905,28 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, + result = isc__nm_process_sock_buffer(sock); + if (result != ISC_R_SUCCESS) { + isc__nm_failed_read_cb(sock, result, true); ++ } else if (!sock->client) { ++ /* ++ * Stop reading if we have accumulated enough bytes in ++ * the send queue; this means that the TCP client is not ++ * reading back the data we sending to it, and there's ++ * no reason to continue processing more incoming DNS ++ * messages, if the client is not reading back the ++ * responses. ++ */ ++ size_t write_queue_size = ++ uv_stream_get_write_queue_size(&sock->uv_handle.stream); ++ ++ if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { ++ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), ++ "throttling TCP connection, " ++ "the other side is " ++ "not reading the data (%zu)", ++ write_queue_size); ++ sock->reading_throttled = true; ++ isc__nm_stop_reading(sock); ++ } + } + free: + if (nread < 0) { +@@ -1125,6 +1147,19 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, + return; + } + ++static void ++tcpdns_maybe_restart_reading(isc_nmsocket_t *sock) { ++ if (!sock->client && sock->reading_throttled && ++ !uv_is_active(&sock->uv_handle.handle)) ++ { ++ isc_result_t result = isc__nm_process_sock_buffer(sock); ++ if (result != ISC_R_SUCCESS) { ++ atomic_store(&sock->reading, true); ++ isc__nm_failed_read_cb(sock, result, false); ++ } ++ } ++} ++ + static void + tcpdns_send_cb(uv_write_t *req, int status) { + isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; +@@ -1142,10 +1177,23 @@ tcpdns_send_cb(uv_write_t *req, int status) { + isc__nm_incstats(sock, STATID_SENDFAIL); + isc__nm_failed_send_cb(sock, uvreq, + isc__nm_uverr2result(status)); ++ ++ if (!sock->client && ++ (atomic_load(&sock->reading) || sock->reading_throttled)) ++ { ++ /* ++ * As we are resuming reading, it is not throttled ++ * anymore (technically). ++ */ ++ sock->reading_throttled = false; ++ isc__nm_start_reading(sock); ++ isc__nmsocket_reset(sock); ++ } + return; + } + + isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); ++ tcpdns_maybe_restart_reading(sock); + } + + /* +@@ -1211,6 +1259,13 @@ isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) { + goto fail; + } + ++ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR, ++ ISC_LOG_DEBUG(3), ++ "throttling TCP connection, the other side is not " ++ "reading the data, switching to uv_write()"); ++ sock->reading_throttled = true; ++ isc__nm_stop_reading(sock); ++ + r = uv_write(&uvreq->uv_req.write, &sock->uv_handle.stream, bufs, nbufs, + tcpdns_send_cb); + if (r < 0) { +diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c +index 40e6fc8..f62dfd4 100644 +--- a/lib/isc/netmgr/tlsdns.c ++++ b/lib/isc/netmgr/tlsdns.c +@@ -88,6 +88,9 @@ tlsdns_set_tls_shutdown(isc_tls_t *tls) { + (void)SSL_set_shutdown(tls, SSL_SENT_SHUTDOWN); + } + ++static void ++tlsdns_maybe_restart_reading(isc_nmsocket_t *sock); ++ + static bool + peer_verification_has_failed(isc_nmsocket_t *sock) { + if (sock->tls.tls != NULL && sock->tls.state == TLS_STATE_HANDSHAKE && +@@ -1076,6 +1079,19 @@ tls_cycle_input(isc_nmsocket_t *sock) { + size_t len; + + for (;;) { ++ /* ++ * There is a similar branch in ++ * isc__nm_process_sock_buffer() which is sufficient to ++ * stop excessive processing in TCP. However, as we wrap ++ * this call in a loop, we need to have it here in order ++ * to limit the number of loop iterations (and, ++ * consequently, the number of messages processed). ++ */ ++ if (atomic_load(&sock->ah) >= STREAM_CLIENTS_PER_CONN) { ++ isc__nm_stop_reading(sock); ++ break; ++ } ++ + (void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0); + + int pending = SSL_pending(sock->tls.tls); +@@ -1253,17 +1269,17 @@ call_pending_send_callbacks(isc_nmsocket_t *sock, const isc_result_t result) { + } + + static void +-free_senddata(isc_nmsocket_t *sock, const isc_result_t result) { ++free_senddata(isc_nmsocket_t *sock, isc__nm_uvreq_t *req, ++ const isc_result_t result) { + REQUIRE(VALID_NMSOCK(sock)); +- REQUIRE(sock->tls.senddata.base != NULL); +- REQUIRE(sock->tls.senddata.length > 0); ++ REQUIRE(req != NULL && req->userbuf.base != NULL && ++ req->userbuf.length > 0); + +- isc_mem_put(sock->mgr->mctx, sock->tls.senddata.base, +- sock->tls.senddata.length); +- sock->tls.senddata.base = NULL; +- sock->tls.senddata.length = 0; ++ isc_mem_put(sock->mgr->mctx, req->userbuf.base, req->userbuf.length); + + call_pending_send_callbacks(sock, result); ++ ++ isc__nm_uvreq_put(&req, sock); + } + + static void +@@ -1276,11 +1292,19 @@ tls_write_cb(uv_write_t *req, int status) { + isc_nm_timer_stop(uvreq->timer); + isc_nm_timer_detach(&uvreq->timer); + +- free_senddata(sock, result); +- +- isc__nm_uvreq_put(&uvreq, sock); ++ free_senddata(sock, uvreq, result); + + if (status != 0) { ++ if (!sock->client && ++ (atomic_load(&sock->reading) || sock->reading_throttled)) ++ { ++ /* ++ * As we are resuming reading, it is not throttled ++ * anymore (technically). ++ */ ++ sock->reading_throttled = false; ++ isc__nm_start_reading(sock); ++ } + tls_error(sock, result); + return; + } +@@ -1290,6 +1314,8 @@ tls_write_cb(uv_write_t *req, int status) { + tls_error(sock, result); + return; + } ++ ++ tlsdns_maybe_restart_reading(sock); + } + + static isc_result_t +@@ -1303,23 +1329,18 @@ tls_cycle_output(isc_nmsocket_t *sock) { + int rv; + int r; + +- if (sock->tls.senddata.base != NULL || +- sock->tls.senddata.length > 0) +- { +- break; +- } +- + if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) { + pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE; + } + +- sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending); +- sock->tls.senddata.length = pending; +- + /* It's a bit misnomer here, but it does the right thing */ + req = isc__nm_get_read_req(sock, NULL); +- req->uvbuf.base = (char *)sock->tls.senddata.base; +- req->uvbuf.len = sock->tls.senddata.length; ++ ++ req->userbuf.base = isc_mem_get(sock->mgr->mctx, pending); ++ req->userbuf.length = (size_t)pending; ++ ++ req->uvbuf.base = (char *)req->userbuf.base; ++ req->uvbuf.len = (size_t)req->userbuf.length; + + rv = BIO_read_ex(sock->tls.app_rbio, req->uvbuf.base, + req->uvbuf.len, &bytes); +@@ -1331,32 +1352,36 @@ tls_cycle_output(isc_nmsocket_t *sock) { + + if (r == pending) { + /* Wrote everything, restart */ +- isc__nm_uvreq_put(&req, sock); +- free_senddata(sock, ISC_R_SUCCESS); ++ free_senddata(sock, req, ISC_R_SUCCESS); + continue; + } + + if (r > 0) { + /* Partial write, send rest asynchronously */ +- memmove(req->uvbuf.base, req->uvbuf.base + r, +- req->uvbuf.len - r); +- req->uvbuf.len = req->uvbuf.len - r; ++ req->uvbuf.base += r; ++ req->uvbuf.len -= r; + } else if (r == UV_ENOSYS || r == UV_EAGAIN) { + /* uv_try_write is not supported, send + * asynchronously */ + } else { + result = isc__nm_uverr2result(r); +- isc__nm_uvreq_put(&req, sock); +- free_senddata(sock, result); ++ free_senddata(sock, req, result); + break; + } + ++ isc_log_write( ++ isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR, ++ ISC_LOG_DEBUG(3), ++ "throttling TCP connection, the other side is not " ++ "reading the data, switching to uv_write()"); ++ sock->reading_throttled = true; ++ isc__nm_stop_reading(sock); ++ + r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, + &req->uvbuf, 1, tls_write_cb); + if (r < 0) { + result = isc__nm_uverr2result(r); +- isc__nm_uvreq_put(&req, sock); +- free_senddata(sock, result); ++ free_senddata(sock, req, result); + break; + } + +@@ -1525,6 +1550,28 @@ isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread, + result = tls_cycle(sock); + if (result != ISC_R_SUCCESS) { + isc__nm_failed_read_cb(sock, result, true); ++ } else if (!sock->client) { ++ /* ++ * Stop reading if we have accumulated enough bytes in ++ * the send queue; this means that the TCP client is not ++ * reading back the data we sending to it, and there's ++ * no reason to continue processing more incoming DNS ++ * messages, if the client is not reading back the ++ * responses. ++ */ ++ size_t write_queue_size = ++ uv_stream_get_write_queue_size(&sock->uv_handle.stream); ++ ++ if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { ++ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), ++ "throttling TCP connection, " ++ "the other side is " ++ "not reading the data (%zu)", ++ write_queue_size); ++ sock->reading_throttled = true; ++ isc__nm_stop_reading(sock); ++ } + } + free: + async_tlsdns_cycle(sock); +@@ -1766,6 +1813,19 @@ isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region, + return; + } + ++static void ++tlsdns_maybe_restart_reading(isc_nmsocket_t *sock) { ++ if (!sock->client && sock->reading_throttled && ++ !uv_is_active(&sock->uv_handle.handle)) ++ { ++ isc_result_t result = isc__nm_process_sock_buffer(sock); ++ if (result != ISC_R_SUCCESS) { ++ atomic_store(&sock->reading, true); ++ isc__nm_failed_read_cb(sock, result, false); ++ } ++ } ++} ++ + /* + * Handle 'tcpsend' async event - send a packet on the socket + */ +diff --git a/lib/ns/client.c b/lib/ns/client.c +index a62343b..8981222 100644 +--- a/lib/ns/client.c ++++ b/lib/ns/client.c +@@ -101,6 +101,9 @@ + #define COOKIE_SIZE 24U /* 8 + 4 + 4 + 8 */ + #define ECS_SIZE 20U /* 2 + 1 + 1 + [0..16] */ + ++#define TCPBUFFERS_FILLCOUNT 1U ++#define TCPBUFFERS_FREEMAX 8U ++ + #define WANTNSID(x) (((x)->attributes & NS_CLIENTATTR_WANTNSID) != 0) + #define WANTEXPIRE(x) (((x)->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0) + #define WANTPAD(x) (((x)->attributes & NS_CLIENTATTR_WANTPAD) != 0) +@@ -330,12 +333,36 @@ client_senddone(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "send failed: %s", + isc_result_totext(result)); ++ isc_nm_bad_request(handle); + } + } + + isc_nmhandle_detach(&handle); + } + ++static void ++client_setup_tcp_buffer(ns_client_t *client) { ++ REQUIRE(client->tcpbuf == NULL); ++ ++ client->tcpbuf = client->manager->tcp_buffer; ++ client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE; ++} ++ ++static void ++client_put_tcp_buffer(ns_client_t *client) { ++ if (client->tcpbuf == NULL) { ++ return; ++ } ++ ++ if (client->tcpbuf != client->manager->tcp_buffer) { ++ isc_mem_put(client->manager->mctx, client->tcpbuf, ++ client->tcpbuf_size); ++ } ++ ++ client->tcpbuf = NULL; ++ client->tcpbuf_size = 0; ++} ++ + static void + client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, + unsigned char **datap) { +@@ -345,12 +372,9 @@ client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer, + REQUIRE(datap != NULL); + + if (TCP_CLIENT(client)) { +- INSIST(client->tcpbuf == NULL); +- client->tcpbuf = isc_mem_get(client->manager->send_mctx, +- NS_CLIENT_TCP_BUFFER_SIZE); +- client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE; ++ client_setup_tcp_buffer(client); + data = client->tcpbuf; +- isc_buffer_init(buffer, data, NS_CLIENT_TCP_BUFFER_SIZE); ++ isc_buffer_init(buffer, data, client->tcpbuf_size); + } else { + data = client->sendbuf; + if ((client->attributes & NS_CLIENTATTR_HAVECOOKIE) == 0) { +@@ -383,11 +407,49 @@ client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) { + + if (isc_buffer_base(buffer) == client->tcpbuf) { + size_t used = isc_buffer_usedlength(buffer); +- client->tcpbuf = isc_mem_reget(client->manager->send_mctx, +- client->tcpbuf, +- client->tcpbuf_size, used); +- client->tcpbuf_size = used; +- r.base = client->tcpbuf; ++ INSIST(client->tcpbuf_size == NS_CLIENT_TCP_BUFFER_SIZE); ++ ++ /* ++ * Copy the data into a smaller buffer before sending, ++ * and keep the original big TCP send buffer for reuse ++ * by other clients. ++ */ ++ if (used > NS_CLIENT_SEND_BUFFER_SIZE) { ++ /* ++ * We can save space by allocating a new buffer with a ++ * correct size and freeing the big buffer. ++ */ ++ unsigned char *new_tcpbuf = ++ isc_mem_get(client->manager->mctx, used); ++ memmove(new_tcpbuf, buffer->base, used); ++ ++ /* ++ * Put the big buffer so we can replace the pointer ++ * and the size with the new ones. ++ */ ++ client_put_tcp_buffer(client); ++ ++ /* ++ * Keep the new buffer's information so it can be freed. ++ */ ++ client->tcpbuf = new_tcpbuf; ++ client->tcpbuf_size = used; ++ ++ r.base = new_tcpbuf; ++ } else { ++ /* ++ * The data fits in the available space in ++ * 'sendbuf', there is no need for a new buffer. ++ */ ++ memmove(client->sendbuf, buffer->base, used); ++ ++ /* ++ * Put the big buffer, we don't need a dynamic buffer. ++ */ ++ client_put_tcp_buffer(client); ++ ++ r.base = client->sendbuf; ++ } + r.length = used; + } else { + isc_buffer_usedregion(buffer, &r); +@@ -461,8 +523,7 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { + return; + done: + if (client->tcpbuf != NULL) { +- isc_mem_put(client->manager->send_mctx, client->tcpbuf, +- client->tcpbuf_size); ++ client_put_tcp_buffer(client); + } + + ns_client_drop(client, result); +@@ -746,8 +807,7 @@ renderend: + + cleanup: + if (client->tcpbuf != NULL) { +- isc_mem_put(client->manager->send_mctx, client->tcpbuf, +- client->tcpbuf_size); ++ client_put_tcp_buffer(client); + } + + if (cleanup_cctx) { +@@ -1629,8 +1689,7 @@ ns__client_reset_cb(void *client0) { + + ns_client_endrequest(client); + if (client->tcpbuf != NULL) { +- isc_mem_put(client->manager->send_mctx, client->tcpbuf, +- client->tcpbuf_size); ++ client_put_tcp_buffer(client); + } + + if (client->keytag != NULL) { +@@ -1661,8 +1720,6 @@ ns__client_put_cb(void *client0) { + client->magic = 0; + client->shuttingdown = true; + +- isc_mem_put(client->manager->send_mctx, client->sendbuf, +- NS_CLIENT_SEND_BUFFER_SIZE); + if (client->opt != NULL) { + INSIST(dns_rdataset_isassociated(client->opt)); + dns_rdataset_disassociate(client->opt); +@@ -2339,8 +2396,6 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { + dns_message_create(client->mctx, DNS_MESSAGE_INTENTPARSE, + &client->message); + +- client->sendbuf = isc_mem_get(client->manager->send_mctx, +- NS_CLIENT_SEND_BUFFER_SIZE); + /* + * Set magic earlier than usual because ns_query_init() + * and the functions it calls will require it. +@@ -2357,7 +2412,6 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { + ns_clientmgr_t *oldmgr = client->manager; + ns_server_t *sctx = client->sctx; + isc_task_t *task = client->task; +- unsigned char *sendbuf = client->sendbuf; + dns_message_t *message = client->message; + isc_mem_t *oldmctx = client->mctx; + ns_query_t query = client->query; +@@ -2372,7 +2426,6 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { + .manager = oldmgr, + .sctx = sctx, + .task = task, +- .sendbuf = sendbuf, + .message = message, + .query = query, + .tid = tid }; +@@ -2397,8 +2450,6 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { + return (ISC_R_SUCCESS); + + cleanup: +- isc_mem_put(client->manager->send_mctx, client->sendbuf, +- NS_CLIENT_SEND_BUFFER_SIZE); + dns_message_detach(&client->message); + isc_task_detach(&client->task); + ns_clientmgr_detach(&client->manager); +@@ -2461,8 +2512,6 @@ clientmgr_destroy(ns_clientmgr_t *manager) { + isc_task_detach(&manager->task); + ns_server_detach(&manager->sctx); + +- isc_mem_detach(&manager->send_mctx); +- + isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager)); + } + +@@ -2499,61 +2548,6 @@ ns_clientmgr_create(ns_server_t *sctx, isc_taskmgr_t *taskmgr, + + ISC_LIST_INIT(manager->recursing); + +- /* +- * We create specialised per-worker memory context specifically +- * dedicated and tuned for allocating send buffers as it is a very +- * common operation. Not doing so may result in excessive memory +- * use in certain workloads. +- * +- * Please see this thread for more details: +- * +- * https://github.com/jemalloc/jemalloc/issues/2483 +- * +- * In particular, this information from the jemalloc developers is +- * of the most interest: +- * +- * https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699 +- * https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849 +- * +- * In essence, we use the following memory management strategy: +- * +- * 1. We use a per-worker memory arena for send buffers memory +- * allocation to reduce lock contention (In reality, we create a +- * per-client manager arena, but we have one client manager per +- * worker). +- * +- * 2. The automatically created arenas settings remain unchanged +- * and may be controlled by users (e.g. by setting the +- * "MALLOC_CONF" variable). +- * +- * 3. We attune the arenas to not use dirty pages cache as the +- * cache would have a poor reuse rate, and that is known to +- * significantly contribute to excessive memory use. +- * +- * 4. There is no strict need for the dirty cache, as there is a +- * per arena bin for each allocation size, so because we initially +- * allocate strictly 64K per send buffer (enough for a DNS +- * message), allocations would get directed to one bin (an "object +- * pool" or a "slab") maintained within an arena. That is, there +- * is an object pool already, specifically to optimise for the +- * case of frequent allocations of objects of the given size. The +- * object pool should suffice our needs, as we will end up +- * recycling the objects from there without the need to back it by +- * an additional layer of dirty pages cache. The dirty pages cache +- * would have worked better in the case when there are more +- * allocation bins involved due to a higher reuse rate (the case +- * of a more "generic" memory management). +- */ +- isc_mem_create_arena(&manager->send_mctx); +- isc_mem_setname(manager->send_mctx, "sendbufs"); +- (void)isc_mem_arena_set_dirty_decay_ms(manager->send_mctx, 0); +- /* +- * Disable muzzy pages cache too, as versions < 5.2.0 have it +- * enabled by default. The muzzy pages cache goes right below the +- * dirty pages cache and backs it. +- */ +- (void)isc_mem_arena_set_muzzy_decay_ms(manager->send_mctx, 0); +- + manager->magic = MANAGER_MAGIC; + + MTRACE("create"); +diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h +index 7a7196f..ea2d83e 100644 +--- a/lib/ns/include/ns/client.h ++++ b/lib/ns/include/ns/client.h +@@ -144,7 +144,6 @@ struct ns_clientmgr { + unsigned int magic; + + isc_mem_t *mctx; +- isc_mem_t *send_mctx; + ns_server_t *sctx; + isc_taskmgr_t *taskmgr; + isc_timermgr_t *timermgr; +@@ -159,6 +158,8 @@ struct ns_clientmgr { + /* Lock covers the recursing list */ + isc_mutex_t reclock; + client_list_t recursing; /*%< Recursing clients */ ++ ++ uint8_t tcp_buffer[NS_CLIENT_TCP_BUFFER_SIZE]; + }; + + /*% nameserver client structure */ +@@ -187,7 +188,6 @@ struct ns_client { + unsigned char *tcpbuf; + size_t tcpbuf_size; + dns_message_t *message; +- unsigned char *sendbuf; + dns_rdataset_t *opt; + dns_ednsopt_t *ede; + uint16_t udpsize; +@@ -240,6 +240,8 @@ struct ns_client { + * bits will be used as the rcode in the response message. + */ + int32_t rcode_override; ++ ++ uint8_t sendbuf[NS_CLIENT_SEND_BUFFER_SIZE]; + }; + + #define NS_CLIENT_MAGIC ISC_MAGIC('N', 'S', 'C', 'c') +-- +2.33.0 + diff --git a/backport-CVE-2024-11187.patch b/backport-CVE-2024-11187.patch new file mode 100644 index 0000000000000000000000000000000000000000..cfffa33230c3d5e80cdaefddae4fe7e06ef3812c --- /dev/null +++ b/backport-CVE-2024-11187.patch @@ -0,0 +1,257 @@ +From fa7b7973e36056440dd688c7f312c89600d4f8cf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Thu, 14 Nov 2024 10:37:29 +0100 +Subject: [PATCH] Limit the additional processing for large RDATA sets + +When answering queries, don't add data to the additional section if +the answer has more than 13 names in the RDATA. This limits the +number of lookups into the database(s) during a single client query, +reducing query processing load. + +Also, don't append any additional data to type=ANY queries. The +answer to ANY is already big enough. + +(cherry picked from commit a1982cf1bb95c818aa7b58988b5611dec80f2408) + +Conflict:Context adaptation +Reference:https://downloads.isc.org/isc/bind9/9.18.33/patches/0002-CVE-2024-11187.patch + +--- + bin/tests/system/additional/tests.sh | 2 +- + bin/tests/system/resolver/tests.sh | 8 ++++++++ + lib/dns/include/dns/rdataset.h | 10 +++++++++- + lib/dns/rbtdb.c | 2 +- + lib/dns/rdataset.c | 7 ++++++- + lib/dns/resolver.c | 19 ++++++++++++------- + lib/ns/query.c | 12 ++++++++---- + 7 files changed, 45 insertions(+), 15 deletions(-) + +diff --git a/bin/tests/system/additional/tests.sh b/bin/tests/system/additional/tests.sh +index 193c9f9..e1b0cfb 100644 +--- a/bin/tests/system/additional/tests.sh ++++ b/bin/tests/system/additional/tests.sh +@@ -279,7 +279,7 @@ n=$((n + 1)) + echo_i "testing with 'minimal-any no;' ($n)" + ret=0 + $DIG $DIGOPTS -t ANY www.rt.example @10.53.0.1 >dig.out.$n || ret=1 +-grep "ANSWER: 3, AUTHORITY: 2, ADDITIONAL: 2" dig.out.$n >/dev/null || ret=1 ++grep "ANSWER: 3, AUTHORITY: 2, ADDITIONAL: 1" dig.out.$n >/dev/null || ret=1 + if [ $ret -eq 1 ]; then + echo_i "failed" + status=$((status + 1)) +diff --git a/bin/tests/system/resolver/tests.sh b/bin/tests/system/resolver/tests.sh +index 1ec5f86..e1a5bbd 100755 +--- a/bin/tests/system/resolver/tests.sh ++++ b/bin/tests/system/resolver/tests.sh +@@ -311,6 +311,10 @@ done + if [ $ret != 0 ]; then echo_i "failed"; fi + status=$((status + ret)) + ++stop_server ns4 ++touch ns4/named.noaa ++start_server --noclean --restart --port ${PORT} ns4 || ret=1 ++ + n=$((n + 1)) + echo_i "RT21594 regression test check setup ($n)" + ret=0 +@@ -347,6 +351,10 @@ grep "status: NXDOMAIN" dig.ns5.out.${n} >/dev/null || ret=1 + if [ $ret != 0 ]; then echo_i "failed"; fi + status=$((status + ret)) + ++stop_server ns4 ++rm ns4/named.noaa ++start_server --noclean --restart --port ${PORT} ns4 || ret=1 ++ + n=$((n + 1)) + echo_i "check that replacement of additional data by a negative cache no data entry clears the additional RRSIGs ($n)" + ret=0 +diff --git a/lib/dns/include/dns/rdataset.h b/lib/dns/include/dns/rdataset.h +index 566ea44..3294f63 100644 +--- a/lib/dns/include/dns/rdataset.h ++++ b/lib/dns/include/dns/rdataset.h +@@ -54,6 +54,8 @@ + #include + #include + ++#define DNS_RDATASET_MAXADDITIONAL 13 ++ + ISC_LANG_BEGINDECLS + + typedef enum { +@@ -454,7 +456,8 @@ dns_rdataset_towirepartial(dns_rdataset_t *rdataset, + isc_result_t + dns_rdataset_additionaldata(dns_rdataset_t *rdataset, + const dns_name_t *owner_name, +- dns_additionaldatafunc_t add, void *arg); ++ dns_additionaldatafunc_t add, void *arg, ++ size_t limit); + /*%< + * For each rdata in rdataset, call 'add' for each name and type in the + * rdata which is subject to additional section processing. +@@ -473,10 +476,15 @@ dns_rdataset_additionaldata(dns_rdataset_t *rdataset, + *\li If a call to dns_rdata_additionaldata() is not successful, the + * result returned will be the result of dns_rdataset_additionaldata(). + * ++ *\li If 'limit' is non-zero and the number of the rdatasets is larger ++ * than 'limit', no additional data will be processed. ++ * + * Returns: + * + *\li #ISC_R_SUCCESS + * ++ *\li #DNS_R_TOOMANYRECORDS in case rdataset count is larger than 'limit' ++ * + *\li Any error that dns_rdata_additionaldata() can return. + */ + +diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c +index c22e021..2d32571 100644 +--- a/lib/dns/rbtdb.c ++++ b/lib/dns/rbtdb.c +@@ -10188,7 +10188,7 @@ no_glue: + idx = hash_32(hash, rbtversion->glue_table_bits); + + (void)dns_rdataset_additionaldata(rdataset, dns_rootname, +- glue_nsdname_cb, &ctx); ++ glue_nsdname_cb, &ctx, 0); + + cur = isc_mem_get(rbtdb->common.mctx, sizeof(*cur)); + +diff --git a/lib/dns/rdataset.c b/lib/dns/rdataset.c +index 4d48203..0b450a9 100644 +--- a/lib/dns/rdataset.c ++++ b/lib/dns/rdataset.c +@@ -577,7 +577,8 @@ dns_rdataset_towire(dns_rdataset_t *rdataset, const dns_name_t *owner_name, + isc_result_t + dns_rdataset_additionaldata(dns_rdataset_t *rdataset, + const dns_name_t *owner_name, +- dns_additionaldatafunc_t add, void *arg) { ++ dns_additionaldatafunc_t add, void *arg, ++ size_t limit) { + dns_rdata_t rdata = DNS_RDATA_INIT; + isc_result_t result; + +@@ -589,6 +590,10 @@ dns_rdataset_additionaldata(dns_rdataset_t *rdataset, + REQUIRE(DNS_RDATASET_VALID(rdataset)); + REQUIRE((rdataset->attributes & DNS_RDATASETATTR_QUESTION) == 0); + ++ if (limit != 0 && dns_rdataset_count(rdataset) > limit) { ++ return DNS_R_TOOMANYRECORDS; ++ } ++ + result = dns_rdataset_first(rdataset); + if (result != ISC_R_SUCCESS) { + return (result); +diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c +index 60cac29..e879ec8 100644 +--- a/lib/dns/resolver.c ++++ b/lib/dns/resolver.c +@@ -8844,7 +8844,7 @@ rctx_answer_any(respctx_t *rctx) { + rdataset->trust = rctx->trust; + + (void)dns_rdataset_additionaldata(rdataset, rctx->aname, +- check_related, rctx); ++ check_related, rctx, 0); + } + + return (ISC_R_SUCCESS); +@@ -8892,7 +8892,7 @@ rctx_answer_match(respctx_t *rctx) { + rctx->ardataset->attributes |= DNS_RDATASETATTR_CACHE; + rctx->ardataset->trust = rctx->trust; + (void)dns_rdataset_additionaldata(rctx->ardataset, rctx->aname, +- check_related, rctx); ++ check_related, rctx, 0); + + for (sigrdataset = ISC_LIST_HEAD(rctx->aname->list); + sigrdataset != NULL; +@@ -9099,7 +9099,7 @@ rctx_authority_positive(respctx_t *rctx) { + */ + (void)dns_rdataset_additionaldata( + rdataset, name, check_related, +- rctx); ++ rctx, 0); + done = true; + } + } +@@ -9606,8 +9606,12 @@ rctx_referral(respctx_t *rctx) { + */ + INSIST(rctx->ns_rdataset != NULL); + FCTX_ATTR_SET(fctx, FCTX_ATTR_GLUING); ++ ++ /* ++ * Mark the glue records in the additional section to be cached. ++ */ + (void)dns_rdataset_additionaldata(rctx->ns_rdataset, rctx->ns_name, +- check_related, rctx); ++ check_related, rctx, 0); + #if CHECK_FOR_GLUE_IN_ANSWER + /* + * Look in the answer section for "glue" that is incorrectly +@@ -9619,8 +9623,9 @@ rctx_referral(respctx_t *rctx) { + if (rctx->glue_in_answer && + (fctx->type == dns_rdatatype_aaaa || fctx->type == dns_rdatatype_a)) + { +- (void)dns_rdataset_additionaldata( +- rctx->ns_rdataset, rctx->ns_name, check_answer, fctx); ++ (void)dns_rdataset_additionaldata(rctx->ns_rdataset, ++ rctx->ns_name, check_answer, ++ fctx, 0); + } + #endif /* if CHECK_FOR_GLUE_IN_ANSWER */ + FCTX_ATTR_CLR(fctx, FCTX_ATTR_GLUING); +@@ -9722,7 +9727,7 @@ again: + if (CHASE(rdataset)) { + rdataset->attributes &= ~DNS_RDATASETATTR_CHASE; + (void)dns_rdataset_additionaldata( +- rdataset, name, check_related, rctx); ++ rdataset, name, check_related, rctx, 0); + rescan = true; + } + } +diff --git a/lib/ns/query.c b/lib/ns/query.c +index 7884514..516396c 100644 +--- a/lib/ns/query.c ++++ b/lib/ns/query.c +@@ -2098,7 +2098,8 @@ addname: + if (trdataset != NULL && dns_rdatatype_followadditional(type)) { + if (client->additionaldepth++ < MAX_RESTARTS) { + eresult = dns_rdataset_additionaldata( +- trdataset, fname, query_additional_cb, qctx); ++ trdataset, fname, query_additional_cb, qctx, ++ DNS_RDATASET_MAXADDITIONAL); + } + client->additionaldepth--; + } +@@ -2198,7 +2199,7 @@ regular: + * We don't care if dns_rdataset_additionaldata() fails. + */ + (void)dns_rdataset_additionaldata(rdataset, name, query_additional_cb, +- qctx); ++ qctx, DNS_RDATASET_MAXADDITIONAL); + CTRACE(ISC_LOG_DEBUG(3), "query_additional: done"); + } + +@@ -2224,7 +2225,8 @@ query_addrrset(query_ctx_t *qctx, dns_name_t **namep, + * To the current response for 'client', add the answer RRset + * '*rdatasetp' and an optional signature set '*sigrdatasetp', with + * owner name '*namep', to section 'section', unless they are +- * already there. Also add any pertinent additional data. ++ * already there. Also add any pertinent additional data, unless ++ * the query was for type ANY. + * + * If 'dbuf' is not NULL, then '*namep' is the name whose data is + * stored in 'dbuf'. In this case, query_addrrset() guarantees that +@@ -2279,7 +2281,9 @@ query_addrrset(query_ctx_t *qctx, dns_name_t **namep, + */ + query_addtoname(mname, rdataset); + query_setorder(qctx, mname, rdataset); +- query_additional(qctx, mname, rdataset); ++ if (qctx->qtype != dns_rdatatype_any) { ++ query_additional(qctx, mname, rdataset); ++ } + + /* + * Note: we only add SIGs if we've added the type they cover, so +-- +2.33.0 + diff --git a/backport-CVE-2024-12705.patch b/backport-CVE-2024-12705.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab145e2db5c205d093e2d81e2c45d5e0976c1609 --- /dev/null +++ b/backport-CVE-2024-12705.patch @@ -0,0 +1,1319 @@ +From 11a2956dce6f983d2bfcb532f5719791845b06ab Mon Sep 17 00:00:00 2001 +From: Artem Boldariev +Date: Thu, 4 Jul 2024 14:58:10 +0300 +Subject: [PATCH] DoH: process data chunk by chunk instead of all at once + +Initially, our DNS-over-HTTP(S) implementation would try to process as +much incoming data from the network as possible. However, that might +be undesirable as we might create too many streams (each effectively +backed by a ns_client_t object). That is too forgiving as it might +overwhelm the server and trash its memory allocator, causing high CPU +and memory usage. + +Instead of doing that, we resort to processing incoming data using a +chunk-by-chunk processing strategy. That is, we split data into small +chunks (currently 256 bytes) and process each of them +asynchronously. However, we can process more than one chunk at +once (up to 4 currently), given that the number of HTTP/2 streams has +not increased while processing a chunk. + +That alone is not enough, though. In addition to the above, we should +limit the number of active streams: these streams for which we have +received a request and started processing it (the ones for which a +read callback was called), as it is perfectly fine to have more opened +streams than active ones. In the case we have reached or surpassed the +limit of active streams, we stop reading AND processing the data from +the remote peer. The number of active streams is effectively decreased +only when responses associated with the active streams are sent to the +remote peer. + +Overall, this strategy is very similar to the one used for other +stream-based DNS transports like TCP and TLS. + +(cherry picked from commit 9846f395ad79bb50a5fa5ca6ab97ef904b3be35a) + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind9/9.18.33/patches/0002-CVE-2024-12705.patch + +--- + lib/isc/netmgr/http.c | 451 +++++++++++++++++++++++++++++++++--- + lib/isc/netmgr/netmgr-int.h | 81 ++++++- + lib/isc/netmgr/netmgr.c | 78 +++++++ + lib/isc/netmgr/tcp.c | 26 ++- + lib/isc/netmgr/tlsstream.c | 137 +++++++++-- + 5 files changed, 723 insertions(+), 50 deletions(-) + +diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c +index 2220edf..6aaa6da 100644 +--- a/lib/isc/netmgr/http.c ++++ b/lib/isc/netmgr/http.c +@@ -85,6 +85,37 @@ + + #define INITIAL_DNS_MESSAGE_BUFFER_SIZE (512) + ++/* ++ * The value should be small enough to not allow a server to open too ++ * many streams at once. It should not be too small either because ++ * the incoming data will be split into too many chunks with each of ++ * them processed asynchronously. ++ */ ++#define INCOMING_DATA_CHUNK_SIZE (256) ++ ++/* ++ * Often processing a chunk does not change the number of streams. In ++ * that case we can process more than once, but we still should have a ++ * hard limit on that. ++ */ ++#define INCOMING_DATA_MAX_CHUNKS_AT_ONCE (4) ++ ++/* ++ * These constants define the grace period to help detect flooding clients. ++ * ++ * The first one defines how much data can be processed before opening ++ * a first stream and received at least some useful (=DNS) data. ++ * ++ * The second one defines how much data from a client we read before ++ * trying to drop a clients who sends not enough useful data. ++ * ++ * The third constant defines how many streams we agree to process ++ * before checking if there was at least one DNS request received. ++ */ ++#define INCOMING_DATA_INITIAL_STREAM_SIZE (1536) ++#define INCOMING_DATA_GRACE_SIZE (MAX_ALLOWED_DATA_IN_HEADERS) ++#define MAX_STREAMS_BEFORE_FIRST_REQUEST (50) ++ + typedef struct isc_nm_http_response_status { + size_t code; + size_t content_length; +@@ -143,6 +174,7 @@ struct isc_nm_http_session { + ISC_LIST(http_cstream_t) cstreams; + ISC_LIST(isc_nmsocket_h2_t) sstreams; + size_t nsstreams; ++ uint64_t total_opened_sstreams; + + isc_nmhandle_t *handle; + isc_nmhandle_t *client_httphandle; +@@ -155,6 +187,18 @@ struct isc_nm_http_session { + + isc__nm_http_pending_callbacks_t pending_write_callbacks; + isc_buffer_t *pending_write_data; ++ ++ /* ++ * The statistical values below are for usage on server-side ++ * only. They are meant to detect clients that are taking too many ++ * resources from the server. ++ */ ++ uint64_t received; /* How many requests have been received. */ ++ uint64_t submitted; /* How many responses were submitted to send */ ++ uint64_t processed; /* How many responses were processed. */ ++ ++ uint64_t processed_incoming_data; ++ uint64_t processed_useful_data; /* DNS data */ + }; + + typedef enum isc_http_error_responses { +@@ -177,6 +221,7 @@ typedef struct isc_http_send_req { + void *cbarg; + isc_buffer_t *pending_write_data; + isc__nm_http_pending_callbacks_t pending_write_callbacks; ++ uint64_t submitted; + } isc_http_send_req_t; + + #define HTTP_ENDPOINTS_MAGIC ISC_MAGIC('H', 'T', 'E', 'P') +@@ -186,10 +231,26 @@ static bool + http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle, + isc_nm_cb_t cb, void *cbarg); + ++static void ++http_log_flooding_peer(isc_nm_http_session_t *session); ++ ++static bool ++http_is_flooding_peer(isc_nm_http_session_t *session); ++ ++static ssize_t ++http_process_input_data(isc_nm_http_session_t *session, ++ isc_buffer_t *input_data); ++ ++static inline bool ++http_too_many_active_streams(isc_nm_http_session_t *session); ++ + static void + http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, + isc_nm_cb_t send_cb, void *send_cbarg); + ++static void ++http_do_bio_async(isc_nm_http_session_t *session); ++ + static void + failed_httpstream_read_cb(isc_nmsocket_t *sock, isc_result_t result, + isc_nm_http_session_t *session); +@@ -491,6 +552,16 @@ finish_http_session(isc_nm_http_session_t *session) { + if (!session->closed) { + session->closed = true; + isc_nm_cancelread(session->handle); ++ isc__nmsocket_timer_stop(session->handle->sock); ++ } ++ ++ /* ++ * Free any unprocessed incoming data in order to not process ++ * it during indirect calls to http_do_bio() that might happen ++ * when calling the failed callbacks. ++ */ ++ if (session->buf != NULL) { ++ isc_buffer_free(&session->buf); + } + + if (session->client) { +@@ -564,6 +635,7 @@ on_server_data_chunk_recv_callback(int32_t stream_id, const uint8_t *data, + if (new_bufsize <= MAX_DNS_MESSAGE_SIZE && + new_bufsize <= h2->content_length) + { ++ session->processed_useful_data += len; + isc_buffer_putmem(&h2->rbuf, data, len); + break; + } +@@ -612,6 +684,9 @@ call_unlink_cstream_readcb(http_cstream_t *cstream, + isc_buffer_usedregion(cstream->rbuf, &read_data); + cstream->read_cb(session->client_httphandle, result, &read_data, + cstream->read_cbarg); ++ if (result == ISC_R_SUCCESS) { ++ isc__nmsocket_timer_restart(session->handle->sock); ++ } + put_http_cstream(session->mctx, cstream); + } + +@@ -653,6 +728,9 @@ on_server_stream_close_callback(int32_t stream_id, + + ISC_LIST_UNLINK(session->sstreams, &sock->h2, link); + session->nsstreams--; ++ if (sock->h2.request_received) { ++ session->submitted++; ++ } + + /* + * By making a call to isc__nmsocket_prep_destroy(), we ensure that +@@ -969,6 +1047,182 @@ client_submit_request(isc_nm_http_session_t *session, http_cstream_t *stream) { + return (ISC_R_SUCCESS); + } + ++static ssize_t ++http_process_input_data(isc_nm_http_session_t *session, ++ isc_buffer_t *input_data) { ++ ssize_t readlen = 0; ++ ssize_t processed = 0; ++ isc_region_t chunk = { 0 }; ++ size_t before, after; ++ size_t i; ++ ++ REQUIRE(VALID_HTTP2_SESSION(session)); ++ REQUIRE(input_data != NULL); ++ ++ if (!http_session_active(session)) { ++ return 0; ++ } ++ ++ /* ++ * For clients that initiate request themselves just process ++ * everything. ++ */ ++ if (session->client) { ++ isc_buffer_remainingregion(input_data, &chunk); ++ if (chunk.length == 0) { ++ return 0; ++ } ++ ++ readlen = nghttp2_session_mem_recv(session->ngsession, ++ chunk.base, chunk.length); ++ ++ if (readlen >= 0) { ++ isc_buffer_forward(input_data, readlen); ++ session->processed_incoming_data += readlen; ++ } ++ ++ return readlen; ++ } ++ ++ /* ++ * If no streams are created during processing, we might process ++ * more than one chunk at a time. Still we should not overdo that ++ * to avoid processing too much data at once as such behaviour is ++ * known for trashing the memory allocator at times. ++ */ ++ for (before = after = session->nsstreams, i = 0; ++ after <= before && i < INCOMING_DATA_MAX_CHUNKS_AT_ONCE; ++ after = session->nsstreams, i++) ++ { ++ const uint64_t active_streams = ++ (session->received - session->processed); ++ ++ /* ++ * If there are non completed send requests in flight -let's ++ * not process any incoming data, as it could lead to piling ++ * up too much send data in send buffers. With many clients ++ * connected it can lead to excessive memory consumption on ++ * the server instance. ++ */ ++ if (session->sending > 0) { ++ break; ++ } ++ ++ /* ++ * If we have reached the maximum number of streams used, we ++ * might stop processing for now, as nghttp2 will happily ++ * consume as much data as possible. ++ */ ++ if (session->nsstreams >= session->max_concurrent_streams && ++ active_streams > 0) ++ { ++ break; ++ } ++ ++ if (http_too_many_active_streams(session)) { ++ break; ++ } ++ ++ isc_buffer_remainingregion(input_data, &chunk); ++ if (chunk.length == 0) { ++ break; ++ } ++ ++ chunk.length = ISC_MIN(chunk.length, INCOMING_DATA_CHUNK_SIZE); ++ ++ readlen = nghttp2_session_mem_recv(session->ngsession, ++ chunk.base, chunk.length); ++ ++ if (readlen >= 0) { ++ isc_buffer_forward(input_data, readlen); ++ session->processed_incoming_data += readlen; ++ processed += readlen; ++ } else { ++ isc_buffer_clear(input_data); ++ return readlen; ++ } ++ } ++ ++ return processed; ++} ++ ++static void ++http_log_flooding_peer(isc_nm_http_session_t *session) { ++ const int log_level = ISC_LOG_DEBUG(1); ++ if (session->handle != NULL && isc_log_wouldlog(isc_lctx, log_level)) { ++ char client_sabuf[ISC_SOCKADDR_FORMATSIZE]; ++ char local_sabuf[ISC_SOCKADDR_FORMATSIZE]; ++ ++ isc_sockaddr_format(&session->handle->sock->peer, client_sabuf, ++ sizeof(client_sabuf)); ++ isc_sockaddr_format(&session->handle->sock->iface, local_sabuf, ++ sizeof(local_sabuf)); ++ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ++ ISC_LOGMODULE_NETMGR, log_level, ++ "Dropping a flooding HTTP/2 peer " ++ "%s (on %s) - processed: %" PRIu64 ++ " bytes, of them useful: %" PRIu64 "", ++ client_sabuf, local_sabuf, ++ session->processed_incoming_data, ++ session->processed_useful_data); ++ } ++} ++ ++static bool ++http_is_flooding_peer(isc_nm_http_session_t *session) { ++ if (session->client) { ++ return false; ++ } ++ ++ /* ++ * A flooding client can try to open a lot of streams before ++ * submitting a request. Let's drop such clients. ++ */ ++ if (session->received == 0 && ++ session->total_opened_sstreams > MAX_STREAMS_BEFORE_FIRST_REQUEST) ++ { ++ return true; ++ } ++ ++ /* ++ * We have processed enough data to open at least one stream and ++ * get some useful data. ++ */ ++ if (session->processed_incoming_data > ++ INCOMING_DATA_INITIAL_STREAM_SIZE && ++ (session->total_opened_sstreams == 0 || ++ session->processed_useful_data == 0)) ++ { ++ return true; ++ } ++ ++ if (session->processed_incoming_data < INCOMING_DATA_GRACE_SIZE) { ++ return false; ++ } ++ ++ /* ++ * The overhead of DoH per DNS message can be minimum 160-180 ++ * bytes. We should allow more for extra information that can be ++ * included in headers, so let's use 256 bytes. Minimum DNS ++ * message size is 12 bytes. So, (256+12)/12=22. Even that can be ++ * too restricting for some edge cases, but should be good enough ++ * for any practical purposes. Not to mention that HTTP/2 may ++ * include legitimate data that is completely useless for DNS ++ * purposes... ++ * ++ * Anyway, at that point we should have processed enough requests ++ * for such clients (if any). ++ */ ++ if (session->processed_useful_data == 0 || ++ (session->processed_incoming_data / ++ session->processed_useful_data) > 22) ++ { ++ return true; ++ } ++ ++ return false; ++} ++ + /* + * Read callback from TLS socket. + */ +@@ -977,6 +1231,7 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, + void *data) { + isc_nm_http_session_t *session = (isc_nm_http_session_t *)data; + ssize_t readlen; ++ isc_buffer_t input; + + REQUIRE(VALID_HTTP2_SESSION(session)); + +@@ -990,11 +1245,17 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, + return; + } + +- readlen = nghttp2_session_mem_recv(session->ngsession, region->base, +- region->length); ++ isc_buffer_init(&input, region->base, region->length); ++ isc_buffer_add(&input, region->length); ++ ++ readlen = http_process_input_data(session, &input); + if (readlen < 0) { + failed_read_cb(ISC_R_UNEXPECTED, session); + return; ++ } else if (http_is_flooding_peer(session)) { ++ http_log_flooding_peer(session); ++ failed_read_cb(ISC_R_RANGE, session); ++ return; + } + + if ((size_t)readlen < region->length) { +@@ -1007,10 +1268,11 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, + isc_buffer_putmem(session->buf, region->base + readlen, + unread_size); + isc_nm_pauseread(session->handle); ++ http_do_bio_async(session); ++ } else { ++ /* We might have something to receive or send, do IO */ ++ http_do_bio(session, NULL, NULL, NULL); + } +- +- /* We might have something to receive or send, do IO */ +- http_do_bio(session, NULL, NULL, NULL); + } + + static void +@@ -1046,14 +1308,18 @@ http_writecb(isc_nmhandle_t *handle, isc_result_t result, void *arg) { + } + + isc_buffer_free(&req->pending_write_data); ++ session->processed += req->submitted; + isc_mem_put(session->mctx, req, sizeof(*req)); + + session->sending--; +- http_do_bio(session, NULL, NULL, NULL); +- isc_nmhandle_detach(&transphandle); +- if (result != ISC_R_SUCCESS && session->sending == 0) { ++ ++ if (result == ISC_R_SUCCESS) { ++ http_do_bio(session, NULL, NULL, NULL); ++ } else { + finish_http_session(session); + } ++ isc_nmhandle_detach(&transphandle); ++ + isc__nm_httpsession_detach(&session); + } + +@@ -1199,7 +1465,9 @@ http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle, + *send = (isc_http_send_req_t){ .pending_write_data = + session->pending_write_data, + .cb = cb, +- .cbarg = cbarg }; ++ .cbarg = cbarg, ++ .submitted = session->submitted }; ++ session->submitted = 0; + session->pending_write_data = NULL; + move_pending_send_callbacks(session, send); + +@@ -1220,6 +1488,27 @@ nothing_to_send: + return (false); + } + ++static inline bool ++http_too_many_active_streams(isc_nm_http_session_t *session) { ++ const uint64_t active_streams = session->received - session->processed; ++ const uint64_t max_active_streams = ISC_MIN( ++ STREAM_CLIENTS_PER_CONN, session->max_concurrent_streams); ++ ++ if (session->client) { ++ return false; ++ } ++ ++ /* ++ * Do not process incoming data if there are too many active DNS ++ * clients (streams) per connection. ++ */ ++ if (active_streams >= max_active_streams) { ++ return true; ++ } ++ ++ return false; ++} ++ + static void + http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, + isc_nm_cb_t send_cb, void *send_cbarg) { +@@ -1235,59 +1524,140 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, + finish_http_session(session); + } + return; +- } else if (nghttp2_session_want_read(session->ngsession) == 0 && +- nghttp2_session_want_write(session->ngsession) == 0 && +- session->pending_write_data == NULL) +- { +- session->closing = true; ++ } ++ ++ if (send_cb != NULL) { ++ INSIST(VALID_NMHANDLE(send_httphandle)); ++ (void)http_send_outgoing(session, send_httphandle, send_cb, ++ send_cbarg); ++ return; ++ } ++ ++ INSIST(send_httphandle == NULL); ++ INSIST(send_cb == NULL); ++ INSIST(send_cbarg == NULL); ++ ++ if (session->pending_write_data != NULL && session->sending == 0) { ++ (void)http_send_outgoing(session, NULL, NULL, NULL); + return; + } + + if (nghttp2_session_want_read(session->ngsession) != 0) { + if (!session->reading) { + /* We have not yet started reading from this handle */ ++ isc__nmsocket_timer_start(session->handle->sock); + isc_nm_read(session->handle, http_readcb, session); + session->reading = true; + } else if (session->buf != NULL) { + size_t remaining = + isc_buffer_remaininglength(session->buf); + /* Leftover data in the buffer, use it */ +- size_t readlen = nghttp2_session_mem_recv( +- session->ngsession, +- isc_buffer_current(session->buf), remaining); ++ size_t remaining_after = 0; ++ ssize_t readlen = 0; ++ isc_nm_http_session_t *tmpsess = NULL; + +- if (readlen == remaining) { ++ /* ++ * Let's ensure that HTTP/2 session and its associated ++ * data will not go "out of scope" too early. ++ */ ++ isc__nm_httpsession_attach(session, &tmpsess); ++ ++ readlen = http_process_input_data(session, ++ session->buf); ++ ++ remaining_after = ++ isc_buffer_remaininglength(session->buf); ++ ++ if (readlen < 0) { ++ failed_read_cb(ISC_R_UNEXPECTED, session); ++ } else if (http_is_flooding_peer(session)) { ++ http_log_flooding_peer(session); ++ failed_read_cb(ISC_R_RANGE, session); ++ } else if ((size_t)readlen == remaining) { + isc_buffer_free(&session->buf); ++ http_do_bio(session, NULL, NULL, NULL); ++ } else if (remaining_after > 0 && ++ remaining_after < remaining) ++ { ++ /* ++ * We have processed a part of the data, now ++ * let's delay processing of whatever is left ++ * here. We want it to be an async operation so ++ * that we will: ++ * ++ * a) let other things run; ++ * b) have finer grained control over how much ++ * data is processed at once, because nghttp2 ++ * would happily consume as much data we pass to ++ * it and that could overwhelm the server. ++ */ ++ http_do_bio_async(session); + } else { +- isc_buffer_forward(session->buf, readlen); ++ (void)http_send_outgoing(session, NULL, NULL, ++ NULL); + } + +- http_do_bio(session, send_httphandle, send_cb, +- send_cbarg); ++ isc__nm_httpsession_detach(&tmpsess); + return; + } else { + /* Resume reading, it's idempotent, wait for more */ + isc_nm_resumeread(session->handle); ++ isc__nmsocket_timer_start(session->handle->sock); + } + } else { + /* We don't want more data, stop reading for now */ + isc_nm_pauseread(session->handle); + } + +- if (send_cb != NULL) { +- INSIST(VALID_NMHANDLE(send_httphandle)); +- (void)http_send_outgoing(session, send_httphandle, send_cb, +- send_cbarg); +- } else { +- INSIST(send_httphandle == NULL); +- INSIST(send_cb == NULL); +- INSIST(send_cbarg == NULL); +- (void)http_send_outgoing(session, NULL, NULL, NULL); ++ /* we might have some data to send after processing */ ++ (void)http_send_outgoing(session, NULL, NULL, NULL); ++ ++ if (nghttp2_session_want_read(session->ngsession) == 0 && ++ nghttp2_session_want_write(session->ngsession) == 0 && ++ session->pending_write_data == NULL) ++ { ++ session->closing = true; ++ isc_nm_pauseread(session->handle); ++ if (session->sending == 0) { ++ finish_http_session(session); ++ } + } + + return; + } + ++static void ++http_do_bio_async_cb(void *arg) { ++ isc_nm_http_session_t *session = arg; ++ ++ REQUIRE(VALID_HTTP2_SESSION(session)); ++ ++ if (session->handle != NULL && ++ !isc__nmsocket_closing(session->handle->sock)) ++ { ++ http_do_bio(session, NULL, NULL, NULL); ++ } ++ ++ isc__nm_httpsession_detach(&session); ++} ++ ++static void ++http_do_bio_async(isc_nm_http_session_t *session) { ++ isc_nm_http_session_t *tmpsess = NULL; ++ ++ REQUIRE(VALID_HTTP2_SESSION(session)); ++ ++ if (session->handle == NULL || ++ isc__nmsocket_closing(session->handle->sock)) ++ { ++ return; ++ } ++ isc__nm_httpsession_attach(session, &tmpsess); ++ isc__nm_async_run( ++ &session->handle->sock->mgr->workers[session->handle->sock->tid], ++ http_do_bio_async_cb, tmpsess); ++} ++ + static isc_result_t + get_http_cstream(isc_nmsocket_t *sock, http_cstream_t **streamp) { + http_cstream_t *cstream = sock->h2.connect.cstream; +@@ -1417,6 +1787,7 @@ transport_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + } + + http_transpost_tcp_nodelay(handle); ++ isc__nmhandle_set_manual_timer(session->handle, true); + + http_call_connect_cb(http_sock, session, result); + +@@ -1660,6 +2031,7 @@ server_on_begin_headers_callback(nghttp2_session *ngsession, + socket->tid = session->handle->sock->tid; + ISC_LINK_INIT(&socket->h2, link); + ISC_LIST_APPEND(session->sstreams, &socket->h2, link); ++ session->total_opened_sstreams++; + + nghttp2_session_set_stream_user_data(ngsession, frame->hd.stream_id, + socket); +@@ -1736,6 +2108,8 @@ server_handle_path_header(isc_nmsocket_t *socket, const uint8_t *value, + socket->mgr->mctx, dns_value, + dns_value_len, + &socket->h2.query_data_len); ++ socket->h2.session->processed_useful_data += ++ dns_value_len; + } else { + socket->h2.query_too_large = true; + return (ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE); +@@ -2043,6 +2417,15 @@ server_call_cb(isc_nmsocket_t *socket, isc_nm_http_session_t *session, + + addr = isc_nmhandle_peeraddr(session->handle); + handle = isc__nmhandle_get(socket, &addr, NULL); ++ if (result != ISC_R_SUCCESS) { ++ data = NULL; ++ } else if (socket->h2.session->handle != NULL) { ++ isc__nmsocket_timer_restart(socket->h2.session->handle->sock); ++ } ++ if (result == ISC_R_SUCCESS) { ++ socket->h2.request_received = true; ++ socket->h2.session->received++; ++ } + socket->h2.cb(handle, result, data, socket->h2.cbarg); + isc_nmhandle_detach(&handle); + } +@@ -2058,6 +2441,12 @@ isc__nm_http_bad_request(isc_nmhandle_t *handle) { + REQUIRE(!atomic_load(&sock->client)); + REQUIRE(VALID_HTTP2_SESSION(sock->h2.session)); + ++ if (sock->h2.response_submitted || ++ !http_session_active(sock->h2.session)) ++ { ++ return; ++ } ++ + (void)server_send_error_response(ISC_HTTP_ERROR_BAD_REQUEST, + sock->h2.session->ngsession, sock); + } +@@ -2480,6 +2869,8 @@ httplisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + isc__nmsocket_attach(httplistensock, &session->serversocket); + server_send_connection_header(session); + ++ isc__nmhandle_set_manual_timer(session->handle, true); ++ + /* TODO H2 */ + http_do_bio(session, NULL, NULL, NULL); + return (ISC_R_SUCCESS); +diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h +index bc1ba73..3935a4a 100644 +--- a/lib/isc/netmgr/netmgr-int.h ++++ b/lib/isc/netmgr/netmgr-int.h +@@ -337,6 +337,7 @@ typedef enum isc__netievent_type { + netievent_privilegedtask, + + netievent_settlsctx, ++ netievent_asyncrun, + + /* + * event type values higher than this will be treated +@@ -708,6 +709,42 @@ typedef struct isc__netievent__tlsctx { + } + + #ifdef HAVE_LIBNGHTTP2 ++typedef void (*isc__nm_asyncrun_cb_t)(void *); ++ ++typedef struct isc__netievent__asyncrun { ++ isc__netievent_type type; ++ ISC_LINK(isc__netievent_t) link; ++ isc__nm_asyncrun_cb_t cb; ++ void *cbarg; ++} isc__netievent__asyncrun_t; ++ ++#define NETIEVENT_ASYNCRUN_TYPE(type) \ ++ typedef isc__netievent__asyncrun_t isc__netievent_##type##_t; ++ ++#define NETIEVENT_ASYNCRUN_DECL(type) \ ++ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ ++ isc_nm_t *nm, isc__nm_asyncrun_cb_t cb, void *cbarg); \ ++ void isc__nm_put_netievent_##type(isc_nm_t *nm, \ ++ isc__netievent_##type##_t *ievent); ++ ++#define NETIEVENT_ASYNCRUN_DEF(type) \ ++ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ ++ isc_nm_t *nm, isc__nm_asyncrun_cb_t cb, void *cbarg) { \ ++ isc__netievent_##type##_t *ievent = \ ++ isc__nm_get_netievent(nm, netievent_##type); \ ++ ievent->cb = cb; \ ++ ievent->cbarg = cbarg; \ ++ \ ++ return (ievent); \ ++ } \ ++ \ ++ void isc__nm_put_netievent_##type(isc_nm_t *nm, \ ++ isc__netievent_##type##_t *ievent) { \ ++ ievent->cb = NULL; \ ++ ievent->cbarg = NULL; \ ++ isc__nm_put_netievent(nm, ievent); \ ++ } ++ + typedef struct isc__netievent__http_eps { + NETIEVENT__SOCKET; + isc_nm_http_endpoints_t *endpoints; +@@ -752,6 +789,7 @@ typedef union { + isc__netievent_tlsconnect_t nitc; + isc__netievent__tlsctx_t nitls; + #ifdef HAVE_LIBNGHTTP2 ++ isc__netievent__asyncrun_t niasync; + isc__netievent__http_eps_t nihttpeps; + #endif /* HAVE_LIBNGHTTP2 */ + } isc__netievent_storage_t; +@@ -948,6 +986,7 @@ typedef struct isc_nmsocket_h2 { + isc_nm_http_endpoints_t **listener_endpoints; + size_t n_listener_endpoints; + ++ bool request_received; + bool response_submitted; + struct { + char *uri; +@@ -1232,6 +1271,7 @@ struct isc_nmsocket { + + isc_barrier_t barrier; + bool barrier_initialised; ++ atomic_bool manual_read_timer; + #ifdef NETMGR_TRACE + void *backtrace[TRACE_SIZE]; + int backtrace_size; +@@ -1550,6 +1590,9 @@ isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); + * Set the read timeout for the TCP socket associated with 'handle'. + */ + ++void ++isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual); ++ + void + isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0); + void +@@ -1792,6 +1835,9 @@ isc__nm_tls_cleartimeout(isc_nmhandle_t *handle); + * around. + */ + ++void ++isc__nmhandle_tls_set_manual_timer(isc_nmhandle_t *handle, const bool manual); ++ + const char * + isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle); + +@@ -1809,6 +1855,15 @@ void + isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle, + uint64_t write_timeout); + ++bool ++isc__nmsocket_tls_timer_running(isc_nmsocket_t *sock); ++ ++void ++isc__nmsocket_tls_timer_restart(isc_nmsocket_t *sock); ++ ++void ++isc__nmsocket_tls_timer_stop(isc_nmsocket_t *sock); ++ + void + isc__nm_http_stoplistening(isc_nmsocket_t *sock); + +@@ -1901,7 +1956,10 @@ void + isc__nm_http_set_max_streams(isc_nmsocket_t *listener, + const uint32_t max_concurrent_streams); + +-#endif ++void ++isc__nm_async_asyncrun(isc__networker_t *worker, isc__netievent_t *ev0); ++ ++#endif /* HAVE_LIBNGHTTP2 */ + + void + isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0); +@@ -2097,6 +2155,8 @@ NETIEVENT_SOCKET_TYPE(tlsdnscycle); + NETIEVENT_SOCKET_REQ_TYPE(httpsend); + NETIEVENT_SOCKET_TYPE(httpclose); + NETIEVENT_SOCKET_HTTP_EPS_TYPE(httpendpoints); ++ ++NETIEVENT_ASYNCRUN_TYPE(asyncrun); + #endif /* HAVE_LIBNGHTTP2 */ + + NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); +@@ -2171,6 +2231,8 @@ NETIEVENT_SOCKET_DECL(tlsdnscycle); + NETIEVENT_SOCKET_REQ_DECL(httpsend); + NETIEVENT_SOCKET_DECL(httpclose); + NETIEVENT_SOCKET_HTTP_EPS_DECL(httpendpoints); ++ ++NETIEVENT_ASYNCRUN_DECL(asyncrun); + #endif /* HAVE_LIBNGHTTP2 */ + + NETIEVENT_SOCKET_REQ_DECL(tcpconnect); +@@ -2287,3 +2349,20 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult); + + void + isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls); ++ ++void ++isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual); ++/* ++ * Set manual read timer control mode - so that it will not get reset ++ * automatically on read nor get started when read is initiated. ++ */ ++ ++#if HAVE_LIBNGHTTP2 ++void ++isc__nm_async_run(isc__networker_t *worker, isc__nm_asyncrun_cb_t cb, ++ void *cbarg); ++/* ++ * Call the given callback asynchronously by the give network manager ++ * worker, pass the given argument to it. ++ */ ++#endif /* HAVE_LIBNGHTTP2 */ +diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c +index f9e3b70..9167927 100644 +--- a/lib/isc/netmgr/netmgr.c ++++ b/lib/isc/netmgr/netmgr.c +@@ -996,6 +996,8 @@ process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) { + NETIEVENT_CASE(httpsend); + NETIEVENT_CASE(httpclose); + NETIEVENT_CASE(httpendpoints); ++ ++ NETIEVENT_CASE(asyncrun); + #endif + NETIEVENT_CASE(settlsctx); + NETIEVENT_CASE(sockstop); +@@ -1114,6 +1116,8 @@ NETIEVENT_SOCKET_DEF(tlsdnsshutdown); + NETIEVENT_SOCKET_REQ_DEF(httpsend); + NETIEVENT_SOCKET_DEF(httpclose); + NETIEVENT_SOCKET_HTTP_EPS_DEF(httpendpoints); ++ ++NETIEVENT_ASYNCRUN_DEF(asyncrun); + #endif /* HAVE_LIBNGHTTP2 */ + + NETIEVENT_SOCKET_REQ_DEF(tcpconnect); +@@ -1625,6 +1629,7 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, + atomic_init(&sock->keepalive, false); + atomic_init(&sock->connected, false); + atomic_init(&sock->timedout, false); ++ atomic_init(&sock->manual_read_timer, false); + + atomic_init(&sock->active_child_connections, 0); + +@@ -2134,6 +2139,15 @@ void + isc__nmsocket_timer_restart(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + ++ switch (sock->type) { ++#if HAVE_LIBNGHTTP2 ++ case isc_nm_tlssocket: ++ return isc__nmsocket_tls_timer_restart(sock); ++#endif /* HAVE_LIBNGHTTP2 */ ++ default: ++ break; ++ } ++ + if (uv_is_closing((uv_handle_t *)&sock->read_timer)) { + return; + } +@@ -2168,6 +2182,15 @@ bool + isc__nmsocket_timer_running(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + ++ switch (sock->type) { ++#if HAVE_LIBNGHTTP2 ++ case isc_nm_tlssocket: ++ return isc__nmsocket_tls_timer_running(sock); ++#endif /* HAVE_LIBNGHTTP2 */ ++ default: ++ break; ++ } ++ + return (uv_is_active((uv_handle_t *)&sock->read_timer)); + } + +@@ -2188,6 +2211,15 @@ isc__nmsocket_timer_stop(isc_nmsocket_t *sock) { + + REQUIRE(VALID_NMSOCK(sock)); + ++ switch (sock->type) { ++#if HAVE_LIBNGHTTP2 ++ case isc_nm_tlssocket: ++ return isc__nmsocket_tls_timer_stop(sock); ++#endif /* HAVE_LIBNGHTTP2 */ ++ default: ++ break; ++ } ++ + /* uv_timer_stop() is idempotent, no need to check if running */ + + r = uv_timer_stop(&sock->read_timer); +@@ -3938,6 +3970,52 @@ isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls) { + client_sabuf, local_sabuf); + } + ++void ++isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { ++ REQUIRE(VALID_NMHANDLE(handle)); ++ REQUIRE(VALID_NMSOCK(handle->sock)); ++ ++ isc_nmsocket_t *sock = handle->sock; ++ ++ switch (sock->type) { ++ case isc_nm_tcpsocket: ++ isc__nmhandle_tcp_set_manual_timer(handle, manual); ++ return; ++#if HAVE_LIBNGHTTP2 ++ case isc_nm_tlssocket: ++ isc__nmhandle_tls_set_manual_timer(handle, manual); ++ return; ++#endif /* HAVE_LIBNGHTTP2 */ ++ default: ++ break; ++ }; ++ ++ UNREACHABLE(); ++} ++ ++#if HAVE_LIBNGHTTP2 ++void ++isc__nm_async_run(isc__networker_t *worker, isc__nm_asyncrun_cb_t cb, ++ void *cbarg) { ++ isc__netievent__asyncrun_t *ievent = NULL; ++ REQUIRE(worker != NULL); ++ REQUIRE(cb != NULL); ++ ++ ievent = isc__nm_get_netievent_asyncrun(worker->mgr, cb, cbarg); ++ isc__nm_enqueue_ievent(worker, (isc__netievent_t *)ievent); ++} ++ ++void ++isc__nm_async_asyncrun(isc__networker_t *worker, isc__netievent_t *ev0) { ++ isc__netievent_asyncrun_t *ievent = (isc__netievent_asyncrun_t *)ev0; ++ ++ UNUSED(worker); ++ ++ ievent->cb(ievent->cbarg); ++} ++ ++#endif /* HAVE_LIBNGHTTP2 */ ++ + #ifdef NETMGR_TRACE + /* + * Dump all active sockets in netmgr. We output to stderr +diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c +index 37d44bd..925bc85 100644 +--- a/lib/isc/netmgr/tcp.c ++++ b/lib/isc/netmgr/tcp.c +@@ -784,7 +784,9 @@ isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) { + return; + } + +- isc__nmsocket_timer_start(sock); ++ if (!atomic_load(&sock->manual_read_timer)) { ++ isc__nmsocket_timer_start(sock); ++ } + } + + void +@@ -822,7 +824,9 @@ isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0) { + REQUIRE(sock->tid == isc_nm_tid()); + UNUSED(worker); + +- isc__nmsocket_timer_stop(sock); ++ if (!atomic_load(&sock->manual_read_timer)) { ++ isc__nmsocket_timer_stop(sock); ++ } + isc__nm_stop_reading(sock); + } + +@@ -931,8 +935,10 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { + } + } + +- /* The timer will be updated */ +- isc__nmsocket_timer_restart(sock); ++ if (!atomic_load(&sock->manual_read_timer)) { ++ /* The timer will be updated */ ++ isc__nmsocket_timer_restart(sock); ++ } + } + + free: +@@ -1521,3 +1527,15 @@ isc__nm_tcp_listener_nactive(isc_nmsocket_t *listener) { + INSIST(nactive >= 0); + return (nactive); + } ++ ++void ++isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { ++ isc_nmsocket_t *sock; ++ ++ REQUIRE(VALID_NMHANDLE(handle)); ++ sock = handle->sock; ++ REQUIRE(VALID_NMSOCK(sock)); ++ REQUIRE(sock->type == isc_nm_tcpsocket); ++ ++ atomic_store(&sock->manual_read_timer, manual); ++} +diff --git a/lib/isc/netmgr/tlsstream.c b/lib/isc/netmgr/tlsstream.c +index a3fc6d2..c7137a0 100644 +--- a/lib/isc/netmgr/tlsstream.c ++++ b/lib/isc/netmgr/tlsstream.c +@@ -60,6 +60,12 @@ tls_error_to_result(const int tls_err, const int tls_state, isc_tls_t *tls) { + } + } + ++static void ++tls_read_start(isc_nmsocket_t *sock); ++ ++static void ++tls_read_stop(isc_nmsocket_t *sock); ++ + static void + tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result); + +@@ -203,8 +209,13 @@ tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result) { + tls_call_connect_cb(sock, handle, result); + isc__nmsocket_clearcb(sock); + isc_nmhandle_detach(&handle); +- } else if (sock->recv_cb != NULL && sock->statichandle != NULL && +- (sock->recv_read || result == ISC_R_TIMEDOUT)) ++ goto do_destroy; ++ } ++ ++ isc__nmsocket_timer_stop(sock); ++ ++ if (sock->recv_cb != NULL && sock->statichandle != NULL && ++ (sock->recv_read || result == ISC_R_TIMEDOUT)) + { + isc__nm_uvreq_t *req = NULL; + INSIST(VALID_NMHANDLE(sock->statichandle)); +@@ -218,13 +229,13 @@ tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result) { + } + isc__nm_readcb(sock, req, result); + if (result == ISC_R_TIMEDOUT && +- (sock->outerhandle == NULL || +- isc__nmsocket_timer_running(sock->outerhandle->sock))) ++ isc__nmsocket_timer_running(sock)) + { + destroy = false; + } + } + ++do_destroy: + if (destroy) { + isc__nmsocket_prep_destroy(sock); + } +@@ -344,6 +355,8 @@ tls_try_handshake(isc_nmsocket_t *sock, isc_result_t *presult) { + INSIST(sock->statichandle == NULL); + isc__nmsocket_log_tls_session_reuse(sock, sock->tlsstream.tls); + tlshandle = isc__nmhandle_get(sock, &sock->peer, &sock->iface); ++ isc__nmsocket_timer_stop(sock); ++ tls_read_stop(sock); + + if (isc__nm_closing(sock)) { + result = ISC_R_SHUTTINGDOWN; +@@ -437,6 +450,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, + sock->tlsstream.state = TLS_HANDSHAKE; + rv = tls_try_handshake(sock, NULL); + INSIST(SSL_is_init_finished(sock->tlsstream.tls) == 0); ++ isc__nmsocket_timer_restart(sock); + } else if (sock->tlsstream.state == TLS_CLOSED) { + return; + } else { /* initialised and doing I/O */ +@@ -502,6 +516,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, + !atomic_load(&sock->readpaused) && + sock->statichandle != NULL && !finish) + { ++ bool was_new_data = false; + uint8_t recv_buf[TLS_BUF_SIZE]; + INSIST(sock->tlsstream.state > TLS_HANDSHAKE); + while ((rv = SSL_read_ex(sock->tlsstream.tls, recv_buf, +@@ -510,7 +525,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, + isc_region_t region; + region = (isc_region_t){ .base = &recv_buf[0], + .length = len }; +- ++ was_new_data = true; + INSIST(VALID_NMHANDLE(sock->statichandle)); + sock->recv_cb(sock->statichandle, ISC_R_SUCCESS, + ®ion, sock->recv_cbarg); +@@ -547,8 +562,29 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, + break; + } + } ++ ++ if (was_new_data && !sock->manual_read_timer) { ++ /* ++ * Some data has been decrypted, it is the right ++ * time to stop the read timer as it will be ++ * restarted on the next read attempt. ++ */ ++ isc__nmsocket_timer_stop(sock); ++ } + } + } ++ ++ /* ++ * Setting 'finish' to 'true' means that we are about to close the ++ * TLS stream (we intend to send TLS shutdown message to the ++ * remote side). After that no new data can be received, so we ++ * should stop the timer regardless of the ++ * 'sock->manual_read_timer' value. ++ */ ++ if (finish) { ++ isc__nmsocket_timer_stop(sock); ++ } ++ + errno = 0; + tls_status = SSL_get_error(sock->tlsstream.tls, rv); + saved_errno = errno; +@@ -601,14 +637,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, + return; + } + +- INSIST(VALID_NMHANDLE(sock->outerhandle)); +- +- if (sock->tlsstream.reading) { +- isc_nm_resumeread(sock->outerhandle); +- } else if (sock->tlsstream.state == TLS_HANDSHAKE) { +- sock->tlsstream.reading = true; +- isc_nm_read(sock->outerhandle, tls_readcb, sock); +- } ++ tls_read_start(sock); + return; + default: + result = tls_error_to_result(tls_status, sock->tlsstream.state, +@@ -742,6 +771,7 @@ tlslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + RUNTIME_CHECK(result == ISC_R_SUCCESS); + /* TODO: catch failure code, detach tlssock, and log the error */ + ++ isc__nmhandle_set_manual_timer(tlssock->outerhandle, true); + tls_do_bio(tlssock, NULL, NULL, false); + return (result); + } +@@ -897,6 +927,29 @@ isc__nm_tls_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { + (isc__netievent_t *)ievent); + } + ++static void ++tls_read_start(isc_nmsocket_t *sock) { ++ INSIST(VALID_NMHANDLE(sock->outerhandle)); ++ ++ if (sock->tlsstream.reading) { ++ isc_nm_resumeread(sock->outerhandle); ++ } else if (sock->tlsstream.state == TLS_HANDSHAKE) { ++ sock->tlsstream.reading = true; ++ isc_nm_read(sock->outerhandle, tls_readcb, sock); ++ } ++ ++ if (!sock->manual_read_timer) { ++ isc__nmsocket_timer_start(sock); ++ } ++} ++ ++static void ++tls_read_stop(isc_nmsocket_t *sock) { ++ if (sock->outerhandle != NULL) { ++ isc_nm_pauseread(sock->outerhandle); ++ } ++} ++ + void + isc__nm_tls_pauseread(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); +@@ -905,9 +958,11 @@ isc__nm_tls_pauseread(isc_nmhandle_t *handle) { + if (atomic_compare_exchange_strong(&handle->sock->readpaused, + &(bool){ false }, true)) + { +- if (handle->sock->outerhandle != NULL) { +- isc_nm_pauseread(handle->sock->outerhandle); ++ if (!atomic_load(&handle->sock->manual_read_timer)) { ++ isc__nmsocket_timer_stop(handle->sock); + } ++ ++ tls_read_stop(handle->sock); + } + } + +@@ -936,6 +991,7 @@ tls_close_direct(isc_nmsocket_t *sock) { + * external references, we can close everything. + */ + if (sock->outerhandle != NULL) { ++ isc__nmsocket_timer_stop(sock); + isc_nm_pauseread(sock->outerhandle); + isc__nmsocket_clearcb(sock->outerhandle->sock); + isc_nmhandle_detach(&sock->outerhandle); +@@ -1080,6 +1136,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { + */ + handle->sock->tlsstream.tlssocket = tlssock; + ++ isc__nmhandle_set_manual_timer(tlssock->outerhandle, true); + tls_do_bio(tlssock, NULL, NULL, false); + return; + error: +@@ -1246,6 +1303,44 @@ isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle, + } + } + ++bool ++isc__nmsocket_tls_timer_running(isc_nmsocket_t *sock) { ++ REQUIRE(VALID_NMSOCK(sock)); ++ REQUIRE(sock->type == isc_nm_tlssocket); ++ ++ if (sock->outerhandle != NULL) { ++ INSIST(VALID_NMHANDLE(sock->outerhandle)); ++ REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); ++ return isc__nmsocket_timer_running(sock->outerhandle->sock); ++ } ++ ++ return false; ++} ++ ++void ++isc__nmsocket_tls_timer_restart(isc_nmsocket_t *sock) { ++ REQUIRE(VALID_NMSOCK(sock)); ++ REQUIRE(sock->type == isc_nm_tlssocket); ++ ++ if (sock->outerhandle != NULL) { ++ INSIST(VALID_NMHANDLE(sock->outerhandle)); ++ REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); ++ isc__nmsocket_timer_restart(sock->outerhandle->sock); ++ } ++} ++ ++void ++isc__nmsocket_tls_timer_stop(isc_nmsocket_t *sock) { ++ REQUIRE(VALID_NMSOCK(sock)); ++ REQUIRE(sock->type == isc_nm_tlssocket); ++ ++ if (sock->outerhandle != NULL) { ++ INSIST(VALID_NMHANDLE(sock->outerhandle)); ++ REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); ++ isc__nmsocket_timer_stop(sock->outerhandle->sock); ++ } ++} ++ + const char * + isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle) { + isc_nmsocket_t *sock = NULL; +@@ -1346,3 +1441,15 @@ tls_try_shutdown(isc_tls_t *tls, const bool force) { + (void)SSL_shutdown(tls); + } + } ++ ++void ++isc__nmhandle_tls_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { ++ isc_nmsocket_t *sock; ++ ++ REQUIRE(VALID_NMHANDLE(handle)); ++ sock = handle->sock; ++ REQUIRE(VALID_NMSOCK(sock)); ++ REQUIRE(sock->type == isc_nm_tlssocket); ++ ++ atomic_store(&sock->manual_read_timer, manual); ++} +-- +2.33.0 + diff --git a/backport-CVE-2024-1737.patch b/backport-CVE-2024-1737.patch new file mode 100644 index 0000000000000000000000000000000000000000..c831867ad44d401ba6ffb11c49c4db222dc11114 --- /dev/null +++ b/backport-CVE-2024-1737.patch @@ -0,0 +1,1500 @@ +From 39d3e2a8ecc1cb4dccefa3ddea477a2887989485 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Sat, 25 May 2024 11:46:56 +0200 +Subject: [PATCH] Add a limit to the number of RR types for single name + +Previously, the number of RR types for a single owner name was limited +only by the maximum number of the types (64k). As the data structure +that holds the RR types for the database node is just a linked list, and +there are places where we just walk through the whole list (again and +again), adding a large number of RR types for a single owner named with +would slow down processing of such name (database node). + +Add a configurable limit to cap the number of the RR types for a single +owner. This is enforced at the database (rbtdb, qpzone, qpcache) level +and configured with new max-types-per-name configuration option that +can be configured globally, per-view and per-zone. + +(cherry picked from commit 00d16211d6368b99f070c1182d8c76b3798ca1db) + +Conflict:Adaptation of the dns_db_settask Function Context +Reference:https://downloads.isc.org/isc/bind9/9.18.28/patches/0002-CVE-2024-1737.patch + + +--- + bin/named/config.c | 2 + + bin/named/server.c | 18 ++++ + bin/named/zoneconf.c | 16 +++ + bin/tests/system/doth/ns2/named.conf.in | 1 + + bin/tests/system/doth/ns3/named.conf.in | 1 + + bin/tests/system/doth/ns4/named.conf.in | 1 + + bin/tests/system/doth/ns5/named.conf.in | 1 + + bin/tests/system/dyndb/driver/db.c | 69 ++++++++---- + doc/arm/reference.rst | 30 ++++++ + doc/misc/mirror.zoneopt | 2 + + doc/misc/options | 4 + + doc/misc/primary.zoneopt | 2 + + doc/misc/redirect.zoneopt | 2 + + doc/misc/secondary.zoneopt | 2 + + doc/misc/static-stub.zoneopt | 2 + + doc/misc/stub.zoneopt | 2 + + lib/dns/cache.c | 24 +++++ + lib/dns/db.c | 18 ++++ + lib/dns/dnsrps.c | 2 + + lib/dns/include/dns/cache.h | 12 +++ + lib/dns/include/dns/db.h | 19 ++++ + lib/dns/include/dns/rdataslab.h | 6 +- + lib/dns/include/dns/view.h | 14 +++ + lib/dns/include/dns/zone.h | 39 +++++++ + lib/dns/rbtdb.c | 138 +++++++++++++++++++++--- + lib/dns/rdataslab.c | 14 ++- + lib/dns/sdb.c | 46 +++++--- + lib/dns/sdlz.c | 79 +++++++++----- + lib/dns/view.c | 21 ++++ + lib/dns/xfrin.c | 24 +---- + lib/dns/zone.c | 96 +++++++++++++---- + lib/isccfg/namedconf.c | 6 ++ + lib/ns/update.c | 15 ++- + 33 files changed, 602 insertions(+), 126 deletions(-) + +diff --git a/bin/named/config.c b/bin/named/config.c +index f95e433..af8637e 100644 +--- a/bin/named/config.c ++++ b/bin/named/config.c +@@ -233,8 +233,10 @@ options {\n\ + ixfr-from-differences false;\n\ + max-journal-size default;\n\ + max-records 0;\n\ ++ max-records-per-type 100;\n\ + max-refresh-time 2419200; /* 4 weeks */\n\ + max-retry-time 1209600; /* 2 weeks */\n\ ++ max-types-per-name 100;\n\ + max-transfer-idle-in 60;\n\ + max-transfer-idle-out 60;\n\ + max-transfer-time-in 120;\n\ +diff --git a/bin/named/server.c b/bin/named/server.c +index bfe6df3..8d7f56e 100644 +--- a/bin/named/server.c ++++ b/bin/named/server.c +@@ -5563,6 +5563,24 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, + dns_resolver_setclientsperquery(view->resolver, cfg_obj_asuint32(obj), + max_clients_per_query); + ++ /* ++ * This is used for the cache and also as a default value ++ * for zone databases. ++ */ ++ obj = NULL; ++ result = named_config_get(maps, "max-records-per-type", &obj); ++ INSIST(result == ISC_R_SUCCESS); ++ dns_view_setmaxrrperset(view, cfg_obj_asuint32(obj)); ++ ++ /* ++ * This is used for the cache and also as a default value ++ * for zone databases. ++ */ ++ obj = NULL; ++ result = named_config_get(maps, "max-types-per-name", &obj); ++ INSIST(result == ISC_R_SUCCESS); ++ dns_view_setmaxtypepername(view, cfg_obj_asuint32(obj)); ++ + obj = NULL; + result = named_config_get(maps, "max-recursion-depth", &obj); + INSIST(result == ISC_R_SUCCESS); +diff --git a/bin/named/zoneconf.c b/bin/named/zoneconf.c +index 44c2242..384a81e 100644 +--- a/bin/named/zoneconf.c ++++ b/bin/named/zoneconf.c +@@ -1083,6 +1083,22 @@ named_zone_configure(const cfg_obj_t *config, const cfg_obj_t *vconfig, + dns_zone_setmaxrecords(zone, 0); + } + ++ obj = NULL; ++ result = named_config_get(maps, "max-records-per-type", &obj); ++ INSIST(result == ISC_R_SUCCESS && obj != NULL); ++ dns_zone_setmaxrrperset(mayberaw, cfg_obj_asuint32(obj)); ++ if (zone != mayberaw) { ++ dns_zone_setmaxrrperset(zone, 0); ++ } ++ ++ obj = NULL; ++ result = named_config_get(maps, "max-types-per-name", &obj); ++ INSIST(result == ISC_R_SUCCESS && obj != NULL); ++ dns_zone_setmaxtypepername(mayberaw, cfg_obj_asuint32(obj)); ++ if (zone != mayberaw) { ++ dns_zone_setmaxtypepername(zone, 0); ++ } ++ + if (raw != NULL && filename != NULL) { + #define SIGNED ".signed" + size_t signedlen = strlen(filename) + sizeof(SIGNED); +diff --git a/bin/tests/system/doth/ns2/named.conf.in b/bin/tests/system/doth/ns2/named.conf.in +index e533f47..f10dac5 100644 +--- a/bin/tests/system/doth/ns2/named.conf.in ++++ b/bin/tests/system/doth/ns2/named.conf.in +@@ -49,6 +49,7 @@ options { + ixfr-from-differences yes; + check-integrity no; + dnssec-validation yes; ++ max-records-per-type 0; + transfers-in 100; + transfers-out 100; + }; +diff --git a/bin/tests/system/doth/ns3/named.conf.in b/bin/tests/system/doth/ns3/named.conf.in +index cd1ab9c..cd9fc63 100644 +--- a/bin/tests/system/doth/ns3/named.conf.in ++++ b/bin/tests/system/doth/ns3/named.conf.in +@@ -44,6 +44,7 @@ options { + ixfr-from-differences yes; + check-integrity no; + dnssec-validation yes; ++ max-records-per-type 0; + }; + + zone "." { +diff --git a/bin/tests/system/doth/ns4/named.conf.in b/bin/tests/system/doth/ns4/named.conf.in +index c7c6c91..43b7c78 100644 +--- a/bin/tests/system/doth/ns4/named.conf.in ++++ b/bin/tests/system/doth/ns4/named.conf.in +@@ -52,6 +52,7 @@ options { + ixfr-from-differences yes; + check-integrity no; + dnssec-validation yes; ++ max-records-per-type 0; + }; + + zone "." { +diff --git a/bin/tests/system/doth/ns5/named.conf.in b/bin/tests/system/doth/ns5/named.conf.in +index 6808618..9323637 100644 +--- a/bin/tests/system/doth/ns5/named.conf.in ++++ b/bin/tests/system/doth/ns5/named.conf.in +@@ -40,6 +40,7 @@ options { + ixfr-from-differences yes; + check-integrity no; + dnssec-validation yes; ++ max-records-per-type 0; + }; + + zone "." { +diff --git a/bin/tests/system/dyndb/driver/db.c b/bin/tests/system/dyndb/driver/db.c +index 334fd54..d34d1e0 100644 +--- a/bin/tests/system/dyndb/driver/db.c ++++ b/bin/tests/system/dyndb/driver/db.c +@@ -563,28 +563,57 @@ hashsize(dns_db_t *db) { + * determine which implementation of dns_db_*() function to call. + */ + static dns_dbmethods_t sampledb_methods = { +- attach, detach, beginload, +- endload, dump, currentversion, +- newversion, attachversion, closeversion, +- findnode, find, findzonecut, +- attachnode, detachnode, expirenode, +- printnode, createiterator, findrdataset, +- allrdatasets, addrdataset, subtractrdataset, +- deleterdataset, issecure, nodecount, +- ispersistent, overmem, settask, +- getoriginnode, transfernode, getnsec3parameters, +- findnsec3node, setsigningtime, getsigningtime, +- resigned, isdnssec, getrrsetstats, ++ attach, ++ detach, ++ beginload, ++ endload, ++ dump, ++ currentversion, ++ newversion, ++ attachversion, ++ closeversion, ++ findnode, ++ find, ++ findzonecut, ++ attachnode, ++ detachnode, ++ expirenode, ++ printnode, ++ createiterator, ++ findrdataset, ++ allrdatasets, ++ addrdataset, ++ subtractrdataset, ++ deleterdataset, ++ issecure, ++ nodecount, ++ ispersistent, ++ overmem, ++ settask, ++ getoriginnode, ++ transfernode, ++ getnsec3parameters, ++ findnsec3node, ++ setsigningtime, ++ getsigningtime, ++ resigned, ++ isdnssec, ++ getrrsetstats, + NULL, /* rpz_attach */ + NULL, /* rpz_ready */ +- findnodeext, findext, setcachestats, +- hashsize, NULL, /* nodefullname */ +- NULL, /* getsize */ +- NULL, /* setservestalettl */ +- NULL, /* getservestalettl */ +- NULL, /* setservestalerefresh */ +- NULL, /* getservestalerefresh */ +- NULL, /* setgluecachestats */ ++ findnodeext, ++ findext, ++ setcachestats, ++ hashsize, ++ NULL, /* nodefullname */ ++ NULL, /* getsize */ ++ NULL, /* setservestalettl */ ++ NULL, /* getservestalettl */ ++ NULL, /* setservestalerefresh */ ++ NULL, /* getservestalerefresh */ ++ NULL, /* setgluecachestats */ ++ NULL, /* setmaxrrperset */ ++ NULL /* setmaxtypepername */ + }; + + /* Auxiliary driver functions. */ +diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst +index e1b8228..29e246b 100644 +--- a/doc/arm/reference.rst ++++ b/doc/arm/reference.rst +@@ -3766,6 +3766,36 @@ system. + This sets the maximum number of records permitted in a zone. The default is + zero, which means the maximum is unlimited. + ++.. namedconf:statement:: max-records-per-type ++ :tags: server ++ :short: Sets the maximum number of records that can be stored in an RRset ++ ++ This sets the maximum number of resource records that can be stored ++ in an RRset in a database. When configured in :namedconf:ref:`options` ++ or :namedconf:ref:`view`, it controls the cache database; it also sets ++ the default value for zone databases, which can be overridden by setting ++ it at the :namedconf:ref:`zone` level. ++ ++ If set to a positive value, any attempt to cache or to add to a zone ++ an RRset with more than the specified number of records will result in ++ a failure. If set to 0, there is no cap on RRset size. The default is ++ 100. ++ ++.. namedconf:statement:: max-types-per-name ++ :tags: server ++ :short: Sets the maximum number of RR types that can be stored for an owner name ++ ++ This sets the maximum number of resource record types that can be stored ++ for a single owner name in a database. When configured in :namedconf:ref:`options` ++ or :namedconf:ref:`view`, it controls the cache database, and also sets ++ the default value for zone databases, which can be overridden by setting ++ it at the :namedconf:ref:`zone` level ++ ++ If set to a positive value, any attempt to cache or to add to a zone an owner ++ name with more than the specified number of resource record types will result ++ in a failure. If set to 0, there is no cap on RR types number. The default is ++ 100. ++ + .. namedconf:statement:: recursive-clients + :tags: query + :short: Specifies the maximum number of concurrent recursive queries the server can perform. +diff --git a/doc/misc/mirror.zoneopt b/doc/misc/mirror.zoneopt +index ac371cd..5f688ca 100644 +--- a/doc/misc/mirror.zoneopt ++++ b/doc/misc/mirror.zoneopt +@@ -18,12 +18,14 @@ zone [ ] { + max-ixfr-ratio ( unlimited | ); + max-journal-size ( default | unlimited | ); + max-records ; ++ max-records-per-type ; + max-refresh-time ; + max-retry-time ; + max-transfer-idle-in ; + max-transfer-idle-out ; + max-transfer-time-in ; + max-transfer-time-out ; ++ max-types-per-name ; + min-refresh-time ; + min-retry-time ; + multi-master ; +diff --git a/doc/misc/options b/doc/misc/options +index a916701..5fe415d 100644 +--- a/doc/misc/options ++++ b/doc/misc/options +@@ -180,6 +180,7 @@ options { + max-journal-size ( default | unlimited | ); + max-ncache-ttl ; + max-records ; ++ max-records-per-type ; + max-recursion-depth ; + max-recursion-queries ; + max-refresh-time ; +@@ -190,6 +191,7 @@ options { + max-transfer-idle-out ; + max-transfer-time-in ; + max-transfer-time-out ; ++ max-types-per-name ; + max-udp-size ; + max-zone-ttl ( unlimited | ); + memstatistics ; +@@ -470,6 +472,7 @@ view [ ] { + max-journal-size ( default | unlimited | ); + max-ncache-ttl ; + max-records ; ++ max-records-per-type ; + max-recursion-depth ; + max-recursion-queries ; + max-refresh-time ; +@@ -479,6 +482,7 @@ view [ ] { + max-transfer-idle-out ; + max-transfer-time-in ; + max-transfer-time-out ; ++ max-types-per-name ; + max-udp-size ; + max-zone-ttl ( unlimited | ); + message-compression ; +diff --git a/doc/misc/primary.zoneopt b/doc/misc/primary.zoneopt +index 8f646e3..1de2f21 100644 +--- a/doc/misc/primary.zoneopt ++++ b/doc/misc/primary.zoneopt +@@ -38,8 +38,10 @@ zone [ ] { + max-ixfr-ratio ( unlimited | ); + max-journal-size ( default | unlimited | ); + max-records ; ++ max-records-per-type ; + max-transfer-idle-out ; + max-transfer-time-out ; ++ max-types-per-name ; + max-zone-ttl ( unlimited | ); + notify ( explicit | master-only | primary-only | ); + notify-delay ; +diff --git a/doc/misc/redirect.zoneopt b/doc/misc/redirect.zoneopt +index bcd9a57..9d238c1 100644 +--- a/doc/misc/redirect.zoneopt ++++ b/doc/misc/redirect.zoneopt +@@ -7,6 +7,8 @@ zone [ ] { + masterfile-format ( raw | text ); + masterfile-style ( full | relative ); + max-records ; ++ max-records-per-type ; ++ max-types-per-name ; + max-zone-ttl ( unlimited | ); + primaries [ port ] { ( | [ port ] | [ port ] ) [ key ] [ tls ]; ... }; + zone-statistics ( full | terse | none | ); +diff --git a/doc/misc/secondary.zoneopt b/doc/misc/secondary.zoneopt +index 3237aab..169fa9b 100644 +--- a/doc/misc/secondary.zoneopt ++++ b/doc/misc/secondary.zoneopt +@@ -30,12 +30,14 @@ zone [ ] { + max-ixfr-ratio ( unlimited | ); + max-journal-size ( default | unlimited | ); + max-records ; ++ max-records-per-type ; + max-refresh-time ; + max-retry-time ; + max-transfer-idle-in ; + max-transfer-idle-out ; + max-transfer-time-in ; + max-transfer-time-out ; ++ max-types-per-name ; + min-refresh-time ; + min-retry-time ; + multi-master ; +diff --git a/doc/misc/static-stub.zoneopt b/doc/misc/static-stub.zoneopt +index 5357528..93a3220 100644 +--- a/doc/misc/static-stub.zoneopt ++++ b/doc/misc/static-stub.zoneopt +@@ -5,6 +5,8 @@ zone [ ] { + forward ( first | only ); + forwarders [ port ] { ( | ) [ port ]; ... }; + max-records ; ++ max-records-per-type ; ++ max-types-per-name ; + server-addresses { ( | ); ... }; + server-names { ; ... }; + zone-statistics ( full | terse | none | ); +diff --git a/doc/misc/stub.zoneopt b/doc/misc/stub.zoneopt +index 29c1d56..2834682 100644 +--- a/doc/misc/stub.zoneopt ++++ b/doc/misc/stub.zoneopt +@@ -12,10 +12,12 @@ zone [ ] { + masterfile-format ( raw | text ); + masterfile-style ( full | relative ); + max-records ; ++ max-records-per-type ; + max-refresh-time ; + max-retry-time ; + max-transfer-idle-in ; + max-transfer-time-in ; ++ max-types-per-name ; + min-refresh-time ; + min-retry-time ; + multi-master ; +diff --git a/lib/dns/cache.c b/lib/dns/cache.c +index 7ffb6f8..782cf2b 100644 +--- a/lib/dns/cache.c ++++ b/lib/dns/cache.c +@@ -144,6 +144,8 @@ struct dns_cache { + dns_ttl_t serve_stale_ttl; + dns_ttl_t serve_stale_refresh; + isc_stats_t *stats; ++ uint32_t maxrrperset; ++ uint32_t maxtypepername; + }; + + /*** +@@ -175,6 +177,8 @@ cache_create_db(dns_cache_t *cache, dns_db_t **db) { + if (result == ISC_R_SUCCESS) { + dns_db_setservestalettl(*db, cache->serve_stale_ttl); + dns_db_setservestalerefresh(*db, cache->serve_stale_refresh); ++ dns_db_setmaxrrperset(*db, cache->maxrrperset); ++ dns_db_setmaxtypepername(*db, cache->maxtypepername); + } + return (result); + } +@@ -1194,6 +1198,26 @@ dns_cache_updatestats(dns_cache_t *cache, isc_result_t result) { + } + } + ++void ++dns_cache_setmaxrrperset(dns_cache_t *cache, uint32_t value) { ++ REQUIRE(VALID_CACHE(cache)); ++ ++ cache->maxrrperset = value; ++ if (cache->db != NULL) { ++ dns_db_setmaxrrperset(cache->db, value); ++ } ++} ++ ++void ++dns_cache_setmaxtypepername(dns_cache_t *cache, uint32_t value) { ++ REQUIRE(VALID_CACHE(cache)); ++ ++ cache->maxtypepername = value; ++ if (cache->db != NULL) { ++ dns_db_setmaxtypepername(cache->db, value); ++ } ++} ++ + /* + * XXX: Much of the following code has been copied in from statschannel.c. + * We should refactor this into a generic function in stats.c that can be +diff --git a/lib/dns/db.c b/lib/dns/db.c +index c95d19a..85f6daa 100644 +--- a/lib/dns/db.c ++++ b/lib/dns/db.c +@@ -1119,3 +1119,21 @@ dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) { + + return (ISC_R_NOTIMPLEMENTED); + } ++ ++void ++dns_db_setmaxrrperset(dns_db_t *db, uint32_t value) { ++ REQUIRE(DNS_DB_VALID(db)); ++ ++ if (db->methods->setmaxrrperset != NULL) { ++ (db->methods->setmaxrrperset)(db, value); ++ } ++} ++ ++void ++dns_db_setmaxtypepername(dns_db_t *db, uint32_t value) { ++ REQUIRE(DNS_DB_VALID(db)); ++ ++ if (db->methods->setmaxtypepername != NULL) { ++ (db->methods->setmaxtypepername)(db, value); ++ } ++} +diff --git a/lib/dns/dnsrps.c b/lib/dns/dnsrps.c +index d4a1c65..73f11da 100644 +--- a/lib/dns/dnsrps.c ++++ b/lib/dns/dnsrps.c +@@ -975,6 +975,8 @@ static dns_dbmethods_t rpsdb_db_methods = { + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ + NULL, /* setgluecachestats */ ++ NULL, /* setmaxrrperset */ ++ NULL /* setmaxtypepername */ + }; + + static dns_rdatasetmethods_t rpsdb_rdataset_methods = { +diff --git a/lib/dns/include/dns/cache.h b/lib/dns/include/dns/cache.h +index 8fc9657..91e94c0 100644 +--- a/lib/dns/include/dns/cache.h ++++ b/lib/dns/include/dns/cache.h +@@ -283,6 +283,18 @@ dns_cache_updatestats(dns_cache_t *cache, isc_result_t result); + * Update cache statistics based on result code in 'result' + */ + ++void ++dns_cache_setmaxrrperset(dns_cache_t *cache, uint32_t value); ++/*%< ++ * Set the maximum resource records per RRSet that can be cached. ++ */ ++ ++void ++dns_cache_setmaxtypepername(dns_cache_t *cache, uint32_t value); ++/*%< ++ * Set the maximum resource record types per owner name that can be cached. ++ */ ++ + #ifdef HAVE_LIBXML2 + int + dns_cache_renderxml(dns_cache_t *cache, void *writer0); +diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h +index 9b53f04..b6e826b 100644 +--- a/lib/dns/include/dns/db.h ++++ b/lib/dns/include/dns/db.h +@@ -185,6 +185,8 @@ typedef struct dns_dbmethods { + isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval); + isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval); + isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats); ++ void (*setmaxrrperset)(dns_db_t *db, uint32_t value); ++ void (*setmaxtypepername)(dns_db_t *db, uint32_t value); + } dns_dbmethods_t; + + typedef isc_result_t (*dns_dbcreatefunc_t)(isc_mem_t *mctx, +@@ -1756,4 +1758,21 @@ dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats); + * dns_rdatasetstats_create(); otherwise NULL. + */ + ++void ++dns_db_setmaxrrperset(dns_db_t *db, uint32_t value); ++/*%< ++ * Set the maximum permissible number of RRs per RRset. If 'value' ++ * is nonzero, then any subsequent attempt to add an rdataset with ++ * more than 'value' RRs will return ISC_R_NOSPACE. ++ */ ++ ++void ++dns_db_setmaxtypepername(dns_db_t *db, uint32_t value); ++/*%< ++ * Set the maximum permissible number of RR types per owner name. ++ * ++ * If 'value' is nonzero, then any subsequent attempt to add an rdataset with a ++ * RR type that would exceed the number of already stored RR types will return ++ * ISC_R_NOSPACE. ++ */ + ISC_LANG_ENDDECLS +diff --git a/lib/dns/include/dns/rdataslab.h b/lib/dns/include/dns/rdataslab.h +index 7364b8d..5729c00 100644 +--- a/lib/dns/include/dns/rdataslab.h ++++ b/lib/dns/include/dns/rdataslab.h +@@ -66,7 +66,8 @@ ISC_LANG_BEGINDECLS + + isc_result_t + dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, +- isc_region_t *region, unsigned int reservelen); ++ isc_region_t *region, unsigned int reservelen, ++ uint32_t limit); + /*%< + * Slabify a rdataset. The slab area will be allocated and returned + * in 'region'. +@@ -122,7 +123,8 @@ isc_result_t + dns_rdataslab_merge(unsigned char *oslab, unsigned char *nslab, + unsigned int reservelen, isc_mem_t *mctx, + dns_rdataclass_t rdclass, dns_rdatatype_t type, +- unsigned int flags, unsigned char **tslabp); ++ unsigned int flags, uint32_t maxrrperset, ++ unsigned char **tslabp); + /*%< + * Merge 'oslab' and 'nslab'. + */ +diff --git a/lib/dns/include/dns/view.h b/lib/dns/include/dns/view.h +index 18b0b33..516c209 100644 +--- a/lib/dns/include/dns/view.h ++++ b/lib/dns/include/dns/view.h +@@ -191,6 +191,8 @@ struct dns_view { + dns_dlzdblist_t dlz_unsearched; + uint32_t fail_ttl; + dns_badcache_t *failcache; ++ uint32_t maxrrperset; ++ uint32_t maxtypepername; + + /* + * Configurable data for server use only, +@@ -1413,4 +1415,16 @@ dns_view_sfd_find(dns_view_t *view, const dns_name_t *name, + *\li 'foundname' to be valid with a buffer sufficient to hold the name. + */ + ++void ++dns_view_setmaxrrperset(dns_view_t *view, uint32_t value); ++/*%< ++ * Set the maximum resource records per RRSet that can be cached. ++ */ ++ ++void ++dns_view_setmaxtypepername(dns_view_t *view, uint32_t value); ++/*%< ++ * Set the maximum resource record types per owner name that can be cached. ++ */ ++ + ISC_LANG_ENDDECLS +diff --git a/lib/dns/include/dns/zone.h b/lib/dns/include/dns/zone.h +index 10ed86c..3449065 100644 +--- a/lib/dns/include/dns/zone.h ++++ b/lib/dns/include/dns/zone.h +@@ -165,6 +165,19 @@ dns_zone_create(dns_zone_t **zonep, isc_mem_t *mctx); + *\li #ISC_R_UNEXPECTED + */ + ++isc_result_t ++dns_zone_makedb(dns_zone_t *zone, dns_db_t **dbp); ++/*%< ++ * Creates a new empty database for the 'zone'. ++ * ++ * Requires: ++ *\li 'zone' to be a valid zone. ++ *\li 'dbp' to point to NULL pointer. ++ * ++ * Returns: ++ *\li dns_db_create() error codes. ++ */ ++ + void + dns_zone_setclass(dns_zone_t *zone, dns_rdataclass_t rdclass); + /*%< +@@ -350,6 +363,32 @@ dns_zone_getmaxrecords(dns_zone_t *zone); + *\li uint32_t maxrecords. + */ + ++void ++dns_zone_setmaxrrperset(dns_zone_t *zone, uint32_t maxrrperset); ++/*%< ++ * Sets the maximum number of records per rrset permitted in a zone. ++ * 0 implies unlimited. ++ * ++ * Requires: ++ *\li 'zone' to be valid initialised zone. ++ * ++ * Returns: ++ *\li void ++ */ ++ ++void ++dns_zone_setmaxtypepername(dns_zone_t *zone, uint32_t maxtypepername); ++/*%< ++ * Sets the maximum number of resource record types per owner name ++ * permitted in a zone. 0 implies unlimited. ++ * ++ * Requires: ++ *\li 'zone' to be valid initialised zone. ++ * ++ * Returns: ++ *\li void ++ */ ++ + void + dns_zone_setmaxttl(dns_zone_t *zone, uint32_t maxttl); + /*%< +diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c +index bc0f8d8..c22e021 100644 +--- a/lib/dns/rbtdb.c ++++ b/lib/dns/rbtdb.c +@@ -450,6 +450,8 @@ struct dns_rbtdb { + rbtdb_serial_t current_serial; + rbtdb_serial_t least_serial; + rbtdb_serial_t next_serial; ++ uint32_t maxrrperset; ++ uint32_t maxtypepername; + rbtdb_version_t *current_version; + rbtdb_version_t *future_version; + rbtdb_versionlist_t open_versions; +@@ -913,6 +915,8 @@ prio_type(rbtdb_rdatatype_t type) { + case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa): + case dns_rdatatype_a: + case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_a): ++ case dns_rdatatype_mx: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_mx): + case dns_rdatatype_aaaa: + case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_aaaa): + case dns_rdatatype_nsec: +@@ -925,6 +929,22 @@ prio_type(rbtdb_rdatatype_t type) { + case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds): + case dns_rdatatype_cname: + case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname): ++ case dns_rdatatype_dname: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname): ++ case dns_rdatatype_svcb: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_svcb): ++ case dns_rdatatype_https: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_https): ++ case dns_rdatatype_dnskey: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dnskey): ++ case dns_rdatatype_srv: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_srv): ++ case dns_rdatatype_txt: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_txt): ++ case dns_rdatatype_ptr: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ptr): ++ case dns_rdatatype_naptr: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_naptr): + return (true); + } + return (false); +@@ -6180,6 +6200,24 @@ update_recordsandxfrsize(bool add, rbtdb_version_t *rbtversion, + RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); + } + ++static bool ++overmaxtype(dns_rbtdb_t *rbtdb, uint32_t ntypes) { ++ if (rbtdb->maxtypepername == 0) { ++ return (false); ++ } ++ ++ return (ntypes >= rbtdb->maxtypepername); ++} ++ ++static bool ++prio_header(rdatasetheader_t *header) { ++ if (NEGATIVE(header) && prio_type(RBTDB_RDATATYPE_EXT(header->type))) { ++ return (true); ++ } ++ ++ return (prio_type(header->type)); ++} ++ + /* + * write lock on rbtnode must be held. + */ +@@ -6191,7 +6229,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + rbtdb_changed_t *changed = NULL; + rdatasetheader_t *topheader = NULL, *topheader_prev = NULL; + rdatasetheader_t *header = NULL, *sigheader = NULL; +- rdatasetheader_t *prioheader = NULL; ++ rdatasetheader_t *prioheader = NULL, *expireheader = NULL; + unsigned char *merged = NULL; + isc_result_t result; + bool header_nx; +@@ -6201,6 +6239,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + rbtdb_rdatatype_t negtype, sigtype; + dns_trust_t trust; + int idx; ++ uint32_t ntypes = 0; + + /* + * Add an rdatasetheader_t to a node. +@@ -6276,6 +6315,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + { + if (topheader->type == sigtype) { + sigheader = topheader; ++ break; + } + } + negtype = RBTDB_RDATATYPE_VALUE(covers, 0); +@@ -6338,7 +6378,13 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { +- if (prio_type(topheader->type)) { ++ if (IS_CACHE(rbtdb) && ACTIVE(topheader, now)) { ++ ++ntypes; ++ expireheader = topheader; ++ } else if (!IS_CACHE(rbtdb)) { ++ ++ntypes; ++ } ++ if (prio_header(topheader)) { + prioheader = topheader; + } + if (topheader->type == newheader->type || +@@ -6428,7 +6474,7 @@ find_header: + rbtdb->common.mctx, + rbtdb->common.rdclass, + (dns_rdatatype_t)header->type, flags, +- &merged); ++ rbtdb->maxrrperset, &merged); + } + if (result == ISC_R_SUCCESS) { + /* +@@ -6707,9 +6753,15 @@ find_header: + /* + * No rdatasets of the given type exist at the node. + */ ++ if (!IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { ++ free_rdataset(rbtdb, rbtdb->common.mctx, ++ newheader); ++ return (DNS_R_TOOMANYRECORDS); ++ } ++ + newheader->down = NULL; + +- if (prio_type(newheader->type)) { ++ if (prio_header(newheader)) { + /* This is a priority type, prepend it */ + newheader->next = rbtnode->data; + rbtnode->data = newheader; +@@ -6722,6 +6774,31 @@ find_header: + newheader->next = rbtnode->data; + rbtnode->data = newheader; + } ++ ++ if (IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { ++ if (expireheader == NULL) { ++ expireheader = newheader; ++ } ++ if (NEGATIVE(newheader) && ++ !prio_header(newheader)) ++ { ++ /* ++ * Add the new non-priority negative ++ * header to the database only ++ * temporarily. ++ */ ++ expireheader = newheader; ++ } ++ ++ set_ttl(rbtdb, expireheader, 0); ++ mark_header_ancient(rbtdb, expireheader); ++ /* ++ * FIXME: In theory, we should mark the RRSIG ++ * and the header at the same time, but there is ++ * no direct link between those two header, so ++ * we would have to check the whole list again. ++ */ ++ } + } + } + +@@ -6767,7 +6844,7 @@ delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + + static isc_result_t + addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, +- dns_rdataset_t *rdataset) { ++ uint32_t maxrrperset, dns_rdataset_t *rdataset) { + struct noqname *noqname; + isc_mem_t *mctx = rbtdb->common.mctx; + dns_name_t name; +@@ -6788,12 +6865,12 @@ addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, + noqname->negsig = NULL; + noqname->type = neg.type; + dns_name_dup(&name, mctx, &noqname->name); +- result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0); ++ result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0, maxrrperset); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + noqname->neg = r.base; +- result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0); ++ result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0, maxrrperset); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } +@@ -6812,7 +6889,7 @@ cleanup: + + static isc_result_t + addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, +- dns_rdataset_t *rdataset) { ++ uint32_t maxrrperset, dns_rdataset_t *rdataset) { + struct noqname *closest; + isc_mem_t *mctx = rbtdb->common.mctx; + dns_name_t name; +@@ -6833,12 +6910,12 @@ addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, + closest->negsig = NULL; + closest->type = neg.type; + dns_name_dup(&name, mctx, &closest->name); +- result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0); ++ result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0, maxrrperset); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + closest->neg = r.base; +- result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0); ++ result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0, maxrrperset); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } +@@ -6916,7 +6993,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + } + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, +- ®ion, sizeof(rdatasetheader_t)); ++ ®ion, sizeof(rdatasetheader_t), ++ rbtdb->maxrrperset); + if (result != ISC_R_SUCCESS) { + return (result); + } +@@ -6974,7 +7052,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + RDATASET_ATTR_SET(newheader, RDATASET_ATTR_OPTOUT); + } + if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { +- result = addnoqname(rbtdb, newheader, rdataset); ++ result = addnoqname(rbtdb, newheader, ++ rbtdb->maxrrperset, rdataset); + if (result != ISC_R_SUCCESS) { + free_rdataset(rbtdb, rbtdb->common.mctx, + newheader); +@@ -6982,7 +7061,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + } + } + if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { +- result = addclosest(rbtdb, newheader, rdataset); ++ result = addclosest(rbtdb, newheader, ++ rbtdb->maxrrperset, rdataset); + if (result != ISC_R_SUCCESS) { + free_rdataset(rbtdb, rbtdb->common.mctx, + newheader); +@@ -7148,7 +7228,8 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + nodefullname(db, node, nodename); + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, +- ®ion, sizeof(rdatasetheader_t)); ++ ®ion, sizeof(rdatasetheader_t), ++ 0); + if (result != ISC_R_SUCCESS) { + return (result); + } +@@ -7552,7 +7633,8 @@ loading_addrdataset(void *arg, const dns_name_t *name, + } + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, +- ®ion, sizeof(rdatasetheader_t)); ++ ®ion, sizeof(rdatasetheader_t), ++ rbtdb->maxrrperset); + if (result != ISC_R_SUCCESS) { + return (result); + } +@@ -8088,6 +8170,24 @@ setgluecachestats(dns_db_t *db, isc_stats_t *stats) { + return (ISC_R_SUCCESS); + } + ++static void ++setmaxrrperset(dns_db_t *db, uint32_t maxrrperset) { ++ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; ++ ++ REQUIRE(VALID_RBTDB(rbtdb)); ++ ++ rbtdb->maxrrperset = maxrrperset; ++} ++ ++static void ++setmaxtypepername(dns_db_t *db, uint32_t maxtypepername) { ++ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; ++ ++ REQUIRE(VALID_RBTDB(rbtdb)); ++ ++ rbtdb->maxtypepername = maxtypepername; ++} ++ + static dns_stats_t * + getrrsetstats(dns_db_t *db) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; +@@ -8209,7 +8309,9 @@ static dns_dbmethods_t zone_methods = { attach, + NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ +- setgluecachestats }; ++ setgluecachestats, ++ setmaxrrperset, ++ setmaxtypepername }; + + static dns_dbmethods_t cache_methods = { attach, + detach, +@@ -8259,7 +8361,9 @@ static dns_dbmethods_t cache_methods = { attach, + getservestalettl, + setservestalerefresh, + getservestalerefresh, +- NULL }; ++ NULL, ++ setmaxrrperset, ++ setmaxtypepername }; + + isc_result_t + dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, +diff --git a/lib/dns/rdataslab.c b/lib/dns/rdataslab.c +index 24fdaa8..5c30d44 100644 +--- a/lib/dns/rdataslab.c ++++ b/lib/dns/rdataslab.c +@@ -114,7 +114,8 @@ fillin_offsets(unsigned char *offsetbase, unsigned int *offsettable, + + isc_result_t + dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, +- isc_region_t *region, unsigned int reservelen) { ++ isc_region_t *region, unsigned int reservelen, ++ uint32_t maxrrperset) { + /* + * Use &removed as a sentinel pointer for duplicate + * rdata as rdata.data == NULL is valid. +@@ -156,6 +157,10 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, + return (ISC_R_SUCCESS); + } + ++ if (maxrrperset > 0 && nitems > maxrrperset) { ++ return (DNS_R_TOOMANYRECORDS); ++ } ++ + if (nitems > 0xffff) { + return (ISC_R_NOSPACE); + } +@@ -482,7 +487,8 @@ isc_result_t + dns_rdataslab_merge(unsigned char *oslab, unsigned char *nslab, + unsigned int reservelen, isc_mem_t *mctx, + dns_rdataclass_t rdclass, dns_rdatatype_t type, +- unsigned int flags, unsigned char **tslabp) { ++ unsigned int flags, uint32_t maxrrperset, ++ unsigned char **tslabp) { + unsigned char *ocurrent, *ostart, *ncurrent, *tstart, *tcurrent, *data; + unsigned int ocount, ncount, count, olength, tlength, tcount, length; + dns_rdata_t ordata = DNS_RDATA_INIT; +@@ -522,6 +528,10 @@ dns_rdataslab_merge(unsigned char *oslab, unsigned char *nslab, + #endif /* if DNS_RDATASET_FIXED */ + INSIST(ocount > 0 && ncount > 0); + ++ if (maxrrperset > 0 && ocount + ncount > maxrrperset) { ++ return (DNS_R_TOOMANYRECORDS); ++ } ++ + #if DNS_RDATASET_FIXED + oncount = ncount; + #endif /* if DNS_RDATASET_FIXED */ +diff --git a/lib/dns/sdb.c b/lib/dns/sdb.c +index 317eeb0..07d720e 100644 +--- a/lib/dns/sdb.c ++++ b/lib/dns/sdb.c +@@ -1269,20 +1269,33 @@ settask(dns_db_t *db, isc_task_t *task) { + } + + static dns_dbmethods_t sdb_methods = { +- attach, detach, +- beginload, endload, +- dump, currentversion, +- newversion, attachversion, +- closeversion, NULL, /* findnode */ +- NULL, /* find */ +- findzonecut, attachnode, +- detachnode, expirenode, +- printnode, createiterator, +- findrdataset, allrdatasets, +- addrdataset, subtractrdataset, +- deleterdataset, issecure, +- nodecount, ispersistent, +- overmem, settask, ++ attach, ++ detach, ++ beginload, ++ endload, ++ dump, ++ currentversion, ++ newversion, ++ attachversion, ++ closeversion, ++ NULL, /* findnode */ ++ NULL, /* find */ ++ findzonecut, ++ attachnode, ++ detachnode, ++ expirenode, ++ printnode, ++ createiterator, ++ findrdataset, ++ allrdatasets, ++ addrdataset, ++ subtractrdataset, ++ deleterdataset, ++ issecure, ++ nodecount, ++ ispersistent, ++ overmem, ++ settask, + getoriginnode, /* getoriginnode */ + NULL, /* transfernode */ + NULL, /* getnsec3parameters */ +@@ -1294,7 +1307,8 @@ static dns_dbmethods_t sdb_methods = { + NULL, /* getrrsetstats */ + NULL, /* rpz_attach */ + NULL, /* rpz_ready */ +- findnodeext, findext, ++ findnodeext, ++ findext, + NULL, /* setcachestats */ + NULL, /* hashsize */ + NULL, /* nodefullname */ +@@ -1304,6 +1318,8 @@ static dns_dbmethods_t sdb_methods = { + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ + NULL, /* setgluecachestats */ ++ NULL, /* setmaxrrperset */ ++ NULL /* setmaxtypepername */ + }; + + static isc_result_t +diff --git a/lib/dns/sdlz.c b/lib/dns/sdlz.c +index 7ab08f6..f9d123d 100644 +--- a/lib/dns/sdlz.c ++++ b/lib/dns/sdlz.c +@@ -1242,34 +1242,57 @@ getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) { + } + + static dns_dbmethods_t sdlzdb_methods = { +- attach, detach, beginload, +- endload, dump, currentversion, +- newversion, attachversion, closeversion, +- findnode, find, findzonecut, +- attachnode, detachnode, expirenode, +- printnode, createiterator, findrdataset, +- allrdatasets, addrdataset, subtractrdataset, +- deleterdataset, issecure, nodecount, +- ispersistent, overmem, settask, +- getoriginnode, NULL, /* transfernode */ +- NULL, /* getnsec3parameters */ +- NULL, /* findnsec3node */ +- NULL, /* setsigningtime */ +- NULL, /* getsigningtime */ +- NULL, /* resigned */ +- NULL, /* isdnssec */ +- NULL, /* getrrsetstats */ +- NULL, /* rpz_attach */ +- NULL, /* rpz_ready */ +- findnodeext, findext, NULL, /* setcachestats */ +- NULL, /* hashsize */ +- NULL, /* nodefullname */ +- NULL, /* getsize */ +- NULL, /* setservestalettl */ +- NULL, /* getservestalettl */ +- NULL, /* setservestalerefresh */ +- NULL, /* getservestalerefresh */ +- NULL, /* setgluecachestats */ ++ attach, ++ detach, ++ beginload, ++ endload, ++ dump, ++ currentversion, ++ newversion, ++ attachversion, ++ closeversion, ++ findnode, ++ find, ++ findzonecut, ++ attachnode, ++ detachnode, ++ expirenode, ++ printnode, ++ createiterator, ++ findrdataset, ++ allrdatasets, ++ addrdataset, ++ subtractrdataset, ++ deleterdataset, ++ issecure, ++ nodecount, ++ ispersistent, ++ overmem, ++ settask, ++ getoriginnode, ++ NULL, /* transfernode */ ++ NULL, /* getnsec3parameters */ ++ NULL, /* findnsec3node */ ++ NULL, /* setsigningtime */ ++ NULL, /* getsigningtime */ ++ NULL, /* resigned */ ++ NULL, /* isdnssec */ ++ NULL, /* getrrsetstats */ ++ NULL, /* rpz_attach */ ++ NULL, /* rpz_ready */ ++ findnodeext, ++ findext, ++ NULL, /* setcachestats */ ++ NULL, /* hashsize */ ++ NULL, /* nodefullname */ ++ NULL, /* getsize */ ++ NULL, /* setservestalettl */ ++ NULL, /* getservestalettl */ ++ NULL, /* setservestalerefresh */ ++ NULL, /* getservestalerefresh */ ++ NULL, /* setgluecachestats */ ++ NULL, /* setmaxrrperset */ ++ NULL /* setmaxtypepername */ + }; + + /* +diff --git a/lib/dns/view.c b/lib/dns/view.c +index 49c9aee..231041e 100644 +--- a/lib/dns/view.c ++++ b/lib/dns/view.c +@@ -892,6 +892,9 @@ dns_view_setcache(dns_view_t *view, dns_cache_t *cache, bool shared) { + dns_cache_attach(cache, &view->cache); + dns_cache_attachdb(cache, &view->cachedb); + INSIST(DNS_DB_VALID(view->cachedb)); ++ ++ dns_cache_setmaxrrperset(view->cache, view->maxrrperset); ++ dns_cache_setmaxtypepername(view->cache, view->maxtypepername); + } + + bool +@@ -2759,3 +2762,21 @@ dns_view_sfd_find(dns_view_t *view, const dns_name_t *name, + dns_name_copy(dns_rootname, foundname); + } + } ++ ++void ++dns_view_setmaxrrperset(dns_view_t *view, uint32_t value) { ++ REQUIRE(DNS_VIEW_VALID(view)); ++ view->maxrrperset = value; ++ if (view->cache != NULL) { ++ dns_cache_setmaxrrperset(view->cache, value); ++ } ++} ++ ++void ++dns_view_setmaxtypepername(dns_view_t *view, uint32_t value) { ++ REQUIRE(DNS_VIEW_VALID(view)); ++ view->maxtypepername = value; ++ if (view->cache != NULL) { ++ dns_cache_setmaxtypepername(view->cache, value); ++ } ++} +diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c +index 1aa982a..e5f1e0b 100644 +--- a/lib/dns/xfrin.c ++++ b/lib/dns/xfrin.c +@@ -211,8 +211,6 @@ xfrin_create(isc_mem_t *mctx, dns_zone_t *zone, dns_db_t *db, isc_nm_t *netmgr, + static isc_result_t + axfr_init(dns_xfrin_ctx_t *xfr); + static isc_result_t +-axfr_makedb(dns_xfrin_ctx_t *xfr, dns_db_t **dbp); +-static isc_result_t + axfr_putdata(dns_xfrin_ctx_t *xfr, dns_diffop_t op, dns_name_t *name, + dns_ttl_t ttl, dns_rdata_t *rdata); + static isc_result_t +@@ -288,7 +286,11 @@ axfr_init(dns_xfrin_ctx_t *xfr) { + dns_db_detach(&xfr->db); + } + +- CHECK(axfr_makedb(xfr, &xfr->db)); ++ CHECK(dns_zone_makedb(xfr->zone, &xfr->db)); ++ ++ dns_zone_rpz_enable_db(xfr->zone, xfr->db); ++ dns_zone_catz_enable_db(xfr->zone, xfr->db); ++ + dns_rdatacallbacks_init(&xfr->axfr); + CHECK(dns_db_beginload(xfr->db, &xfr->axfr)); + result = ISC_R_SUCCESS; +@@ -296,22 +298,6 @@ failure: + return (result); + } + +-static isc_result_t +-axfr_makedb(dns_xfrin_ctx_t *xfr, dns_db_t **dbp) { +- isc_result_t result; +- +- result = dns_db_create(xfr->mctx, /* XXX */ +- "rbt", /* XXX guess */ +- &xfr->name, dns_dbtype_zone, xfr->rdclass, 0, +- NULL, /* XXX guess */ +- dbp); +- if (result == ISC_R_SUCCESS) { +- dns_zone_rpz_enable_db(xfr->zone, *dbp); +- dns_zone_catz_enable_db(xfr->zone, *dbp); +- } +- return (result); +-} +- + static isc_result_t + axfr_putdata(dns_xfrin_ctx_t *xfr, dns_diffop_t op, dns_name_t *name, + dns_ttl_t ttl, dns_rdata_t *rdata) { +diff --git a/lib/dns/zone.c b/lib/dns/zone.c +index 3b95136..f14a166 100644 +--- a/lib/dns/zone.c ++++ b/lib/dns/zone.c +@@ -309,6 +309,8 @@ struct dns_zone { + uint32_t minretry; + + uint32_t maxrecords; ++ uint32_t maxrrperset; ++ uint32_t maxtypepername; + + isc_sockaddr_t *primaries; + dns_name_t **primarykeynames; +@@ -2327,31 +2329,13 @@ zone_load(dns_zone_t *zone, unsigned int flags, bool locked) { + dns_zone_logc(zone, DNS_LOGCATEGORY_ZONELOAD, ISC_LOG_DEBUG(1), + "starting load"); + +- result = dns_db_create(zone->mctx, zone->db_argv[0], &zone->origin, +- (zone->type == dns_zone_stub) ? dns_dbtype_stub +- : dns_dbtype_zone, +- zone->rdclass, zone->db_argc - 1, +- zone->db_argv + 1, &db); +- ++ result = dns_zone_makedb(zone, &db); + if (result != ISC_R_SUCCESS) { + dns_zone_logc(zone, DNS_LOGCATEGORY_ZONELOAD, ISC_LOG_ERROR, + "loading zone: creating database: %s", + isc_result_totext(result)); + goto cleanup; + } +- dns_db_settask(db, zone->task); +- +- if (zone->type == dns_zone_primary || +- zone->type == dns_zone_secondary || zone->type == dns_zone_mirror) +- { +- result = dns_db_setgluecachestats(db, zone->gluecachestats); +- if (result == ISC_R_NOTIMPLEMENTED) { +- result = ISC_R_SUCCESS; +- } +- if (result != ISC_R_SUCCESS) { +- goto cleanup; +- } +- } + + if (!dns_db_ispersistent(db)) { + if (zone->masterfile != NULL || zone->stream != NULL) { +@@ -10063,6 +10047,7 @@ cleanup: + } + + dns_diff_clear(&_sig_diff); ++ dns_diff_clear(&post_diff); + + for (i = 0; i < nkeys; i++) { + dst_key_free(&zone_keys[i]); +@@ -12332,6 +12317,26 @@ dns_zone_setmaxrecords(dns_zone_t *zone, uint32_t val) { + zone->maxrecords = val; + } + ++void ++dns_zone_setmaxrrperset(dns_zone_t *zone, uint32_t val) { ++ REQUIRE(DNS_ZONE_VALID(zone)); ++ ++ zone->maxrrperset = val; ++ if (zone->db != NULL) { ++ dns_db_setmaxrrperset(zone->db, val); ++ } ++} ++ ++void ++dns_zone_setmaxtypepername(dns_zone_t *zone, uint32_t val) { ++ REQUIRE(DNS_ZONE_VALID(zone)); ++ ++ zone->maxtypepername = val; ++ if (zone->db != NULL) { ++ dns_db_setmaxtypepername(zone->db, val); ++ } ++} ++ + static bool + notify_isqueued(dns_zone_t *zone, unsigned int flags, dns_name_t *name, + isc_sockaddr_t *addr, dns_tsigkey_t *key, +@@ -14799,6 +14804,9 @@ ns_query(dns_zone_t *zone, dns_rdataset_t *soardataset, dns_stub_t *stub) { + goto cleanup; + } + dns_db_settask(stub->db, zone->task); ++ dns_db_setmaxrrperset(stub->db, zone->maxrrperset); ++ dns_db_setmaxtypepername(stub->db, ++ zone->maxtypepername); + } + + result = dns_db_newversion(stub->db, &stub->version); +@@ -17516,6 +17524,8 @@ zone_replacedb(dns_zone_t *zone, dns_db_t *db, bool dump) { + } + zone_attachdb(zone, db); + dns_db_settask(zone->db, zone->task); ++ dns_db_setmaxrrperset(zone->db, zone->maxrrperset); ++ dns_db_setmaxtypepername(zone->db, zone->maxtypepername); + DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_LOADED | DNS_ZONEFLG_NEEDNOTIFY); + return (ISC_R_SUCCESS); + +@@ -22045,7 +22055,11 @@ failure: + * Something went wrong; try again in ten minutes or + * after a key refresh interval, whichever is shorter. + */ +- dnssec_log(zone, ISC_LOG_DEBUG(3), ++ int loglevel = ISC_LOG_DEBUG(3); ++ if (result != DNS_R_NOTLOADED) { ++ loglevel = ISC_LOG_ERROR; ++ } ++ dnssec_log(zone, loglevel, + "zone_rekey failure: %s (retry in %u seconds)", + isc_result_totext(result), + ISC_MIN(zone->refreshkeyinterval, 600)); +@@ -23706,3 +23720,45 @@ zmgr_tlsctx_attach(dns_zonemgr_t *zmgr, isc_tlsctx_cache_t **ptlsctx_cache) { + + RWUNLOCK(&zmgr->tlsctx_cache_rwlock, isc_rwlocktype_read); + } ++ ++isc_result_t ++dns_zone_makedb(dns_zone_t *zone, dns_db_t **dbp) { ++ REQUIRE(DNS_ZONE_VALID(zone)); ++ REQUIRE(dbp != NULL && *dbp == NULL); ++ ++ dns_db_t *db = NULL; ++ ++ isc_result_t result = dns_db_create( ++ zone->mctx, zone->db_argv[0], &zone->origin, ++ (zone->type == dns_zone_stub) ? dns_dbtype_stub ++ : dns_dbtype_zone, ++ zone->rdclass, zone->db_argc - 1, zone->db_argv + 1, &db); ++ if (result != ISC_R_SUCCESS) { ++ return (result); ++ } ++ ++ switch (zone->type) { ++ case dns_zone_primary: ++ case dns_zone_secondary: ++ case dns_zone_mirror: ++ result = dns_db_setgluecachestats(db, zone->gluecachestats); ++ if (result == ISC_R_NOTIMPLEMENTED) { ++ result = ISC_R_SUCCESS; ++ } ++ if (result != ISC_R_SUCCESS) { ++ dns_db_detach(&db); ++ return (result); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ dns_db_settask(db, zone->task); ++ dns_db_setmaxrrperset(db, zone->maxrrperset); ++ dns_db_setmaxtypepername(db, zone->maxtypepername); ++ ++ *dbp = db; ++ ++ return (ISC_R_SUCCESS); ++} +diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c +index 5a8ccb2..7938bcb 100644 +--- a/lib/isccfg/namedconf.c ++++ b/lib/isccfg/namedconf.c +@@ -2300,6 +2300,12 @@ static cfg_clausedef_t zone_clauses[] = { + { "max-records", &cfg_type_uint32, + CFG_ZONE_PRIMARY | CFG_ZONE_SECONDARY | CFG_ZONE_MIRROR | + CFG_ZONE_STUB | CFG_ZONE_STATICSTUB | CFG_ZONE_REDIRECT }, ++ { "max-records-per-type", &cfg_type_uint32, ++ CFG_ZONE_PRIMARY | CFG_ZONE_SECONDARY | CFG_ZONE_MIRROR | ++ CFG_ZONE_STUB | CFG_ZONE_STATICSTUB | CFG_ZONE_REDIRECT }, ++ { "max-types-per-name", &cfg_type_uint32, ++ CFG_ZONE_PRIMARY | CFG_ZONE_SECONDARY | CFG_ZONE_MIRROR | ++ CFG_ZONE_STUB | CFG_ZONE_STATICSTUB | CFG_ZONE_REDIRECT }, + { "max-refresh-time", &cfg_type_uint32, + CFG_ZONE_SECONDARY | CFG_ZONE_MIRROR | CFG_ZONE_STUB }, + { "max-retry-time", &cfg_type_uint32, +diff --git a/lib/ns/update.c b/lib/ns/update.c +index 983ca84..5d72686 100644 +--- a/lib/ns/update.c ++++ b/lib/ns/update.c +@@ -3302,9 +3302,18 @@ update_action(isc_task_t *task, isc_event_t *event) { + dns_diff_clear(&ctx.add_diff); + goto failure; + } +- CHECK(update_one_rr(db, ver, &diff, +- DNS_DIFFOP_ADD, +- name, ttl, &rdata)); ++ result = update_one_rr( ++ db, ver, &diff, DNS_DIFFOP_ADD, ++ name, ttl, &rdata); ++ if (result != ISC_R_SUCCESS) { ++ update_log(client, zone, ++ LOGLEVEL_PROTOCOL, ++ "adding an RR " ++ "failed: %s", ++ isc_result_totext( ++ result)); ++ goto failure; ++ } + } + } + } else if (update_class == dns_rdataclass_any) { +-- +2.33.0 + diff --git a/backport-CVE-2024-1975.patch b/backport-CVE-2024-1975.patch new file mode 100644 index 0000000000000000000000000000000000000000..551de7e30e285ed2898d5021c48bc551702ddf7c --- /dev/null +++ b/backport-CVE-2024-1975.patch @@ -0,0 +1,352 @@ +From bef3d2cca3552100bbe44790c8c1a4f5bef06798 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20=C5=A0pa=C4=8Dek?= +Date: Thu, 16 May 2024 12:10:41 +0200 +Subject: [PATCH] Remove support for SIG(0) message verification + +Conflict:Case adaptation +Reference:https://downloads.isc.org/isc/bind9/9.18.28/patches/0003-CVE-2024-1975.patch + +--- + bin/tests/system/tsiggss/authsock.pl | 5 ++ + bin/tests/system/tsiggss/tests.sh | 12 ++-- + bin/tests/system/upforwd/tests.sh | 9 ++- + doc/arm/general.rst | 6 +- + doc/arm/intro-security.inc.rst | 2 +- + doc/arm/reference.rst | 4 +- + doc/arm/security.inc.rst | 4 +- + doc/arm/sig0.inc.rst | 16 +---- + lib/dns/message.c | 99 ++-------------------------- + lib/ns/client.c | 7 ++ + 10 files changed, 40 insertions(+), 124 deletions(-) + +diff --git a/bin/tests/system/tsiggss/authsock.pl b/bin/tests/system/tsiggss/authsock.pl +index 4c76bf8..972252a 100644 +--- a/bin/tests/system/tsiggss/authsock.pl ++++ b/bin/tests/system/tsiggss/authsock.pl +@@ -33,6 +33,10 @@ if (!defined($path)) { + exit(1); + } + ++# Enable output autoflush so that it's not lost when the parent sends TERM. ++select STDOUT; ++$| = 1; ++ + unlink($path); + my $server = IO::Socket::UNIX->new(Local => $path, Type => SOCK_STREAM, Listen => 8) or + die "unable to create socket $path"; +@@ -50,6 +54,7 @@ if ($timeout != 0) { + } + + while (my $client = $server->accept()) { ++ printf("accept()\n"); + $client->recv(my $buf, 8, 0); + my ($version, $req_len) = unpack('N N', $buf); + +diff --git a/bin/tests/system/tsiggss/tests.sh b/bin/tests/system/tsiggss/tests.sh +index c37f32e..004ad83 100644 +--- a/bin/tests/system/tsiggss/tests.sh ++++ b/bin/tests/system/tsiggss/tests.sh +@@ -117,7 +117,7 @@ status=$((status + ret)) + + echo_i "testing external update policy (CNAME) with auth sock ($n)" + ret=0 +-$PERL ./authsock.pl --type=CNAME --path=ns1/auth.sock --pidfile=authsock.pid --timeout=120 >/dev/null 2>&1 & ++$PERL ./authsock.pl --type=CNAME --path=ns1/auth.sock --pidfile=authsock.pid --timeout=120 >authsock.log 2>&1 & + sleep 1 + test_update $n testcname.example.nil. CNAME "86400 CNAME testdenied.example.nil" "testdenied" || ret=1 + n=$((n + 1)) +@@ -131,17 +131,19 @@ n=$((n + 1)) + if [ "$ret" -ne 0 ]; then echo_i "failed"; fi + status=$((status + ret)) + +-echo_i "testing external policy with SIG(0) key ($n)" ++echo_i "testing external policy with unsupported SIG(0) key ($n)" + ret=0 +-$NSUPDATE -k ns1/Kkey.example.nil.*.private </dev/null 2>&1 || ret=1 ++$NSUPDATE -d -k ns1/Kkey.example.nil.*.private <nsupdate.out${n} 2>&1 || true ++debug + server 10.53.0.1 ${PORT} + zone example.nil + update add fred.example.nil 120 cname foo.bar. + send + END + output=$($DIG $DIGOPTS +short cname fred.example.nil.) +-[ -n "$output" ] || ret=1 +-[ $ret -eq 0 ] || echo_i "failed" ++# update must have failed - SIG(0) signer is not supported ++[ -n "$output" ] && ret=1 ++grep -F "signer=key.example.nil" authsock.log >/dev/null && ret=1 + n=$((n + 1)) + if [ "$ret" -ne 0 ]; then echo_i "failed"; fi + status=$((status + ret)) +diff --git a/bin/tests/system/upforwd/tests.sh b/bin/tests/system/upforwd/tests.sh +index 518eac6..d231d0f 100644 +--- a/bin/tests/system/upforwd/tests.sh ++++ b/bin/tests/system/upforwd/tests.sh +@@ -229,10 +229,12 @@ fi + n=$((n + 1)) + + if test -f keyname; then +- echo_i "checking update forwarding to with sig0 ($n)" ++ echo_i "checking update forwarding to with sig0 (expected to fail) ($n)" + ret=0 + keyname=$(cat keyname) +- $NSUPDATE -k $keyname.private -- - <nsupdate.out.$n 2>&1 && ret=1 + $DIG -p ${PORT} unsigned.example2 A @10.53.0.1 >dig.out.ns1.test$n +- grep "status: NOERROR" dig.out.ns1.test$n >/dev/null || ret=1 ++ grep "status: NOERROR" dig.out.ns1.test$n >/dev/null && ret=1 + if [ $ret != 0 ]; then echo_i "failed"; fi + status=$((status + ret)) + n=$((n + 1)) +diff --git a/doc/arm/general.rst b/doc/arm/general.rst +index 5b65f6a..35f74b3 100644 +--- a/doc/arm/general.rst ++++ b/doc/arm/general.rst +@@ -379,10 +379,8 @@ Notes + .. [#rfc1035_2] CLASS ANY queries are not supported. This is considered a + feature. + +-.. [#rfc2931] When receiving a query signed with a SIG(0), the server is +- only able to verify the signature if it has the key in its local +- authoritative data; it cannot do recursion or validation to +- retrieve unknown keys. ++.. [#rfc2931] Support for SIG(0) message verification was removed ++ as part of the mitigation of CVE-2024-1975. + + .. [#rfc2874] Compliance is with loading and serving of A6 records only. + A6 records were moved to the experimental category by :rfc:`3363`. +diff --git a/doc/arm/intro-security.inc.rst b/doc/arm/intro-security.inc.rst +index 87db970..996e910 100644 +--- a/doc/arm/intro-security.inc.rst ++++ b/doc/arm/intro-security.inc.rst +@@ -47,7 +47,7 @@ or ports come preconfigured with local (loopback address) security preconfigured + If ``rndc`` is being invoked from a remote host, further configuration is required. + The ``nsupdate`` tool uses **Dynamic DNS (DDNS)** features and allows users to dynamically + change the contents of the zone file(s). ``nsupdate`` access and security may be controlled +-using ``named.conf`` :ref:`statements or using TSIG or SIG(0) cryptographic methods `. ++using ``named.conf`` :ref:`statements or via the TSIG cryptographic method `. + Clearly, if the remote hosts used for either ``rndc`` or DDNS lie within a network entirely + under the user's control, the security threat may be regarded as non-existent. Any implementation requirements, + therefore, depend on the site's security policy. +diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst +index 29e246b..157ab30 100644 +--- a/doc/arm/reference.rst ++++ b/doc/arm/reference.rst +@@ -7417,7 +7417,7 @@ the zone's filename, unless :any:`inline-signing` is enabled. + updates are allowed. It specifies a set of rules, in which each rule + either grants or denies permission for one or more names in the zone to + be updated by one or more identities. Identity is determined by the key +- that signed the update request, using either TSIG or SIG(0). In most ++ that signed the update request, using TSIG. In most + cases, :any:`update-policy` rules only apply to key-based identities. There + is no way to specify update permissions based on the client source address. + +@@ -7474,7 +7474,7 @@ the zone's filename, unless :any:`inline-signing` is enabled. + field. Details for each rule type are described below. + + The ``identity`` field must be set to a fully qualified domain name. In +- most cases, this represents the name of the TSIG or SIG(0) key that ++ most cases, this represents the name of the TSIG key that + must be used to sign the update request. If the specified name is a + wildcard, it is subject to DNS wildcard expansion, and the rule may + apply to multiple identities. When a TKEY exchange has been used to +diff --git a/doc/arm/security.inc.rst b/doc/arm/security.inc.rst +index 878fa37..8fc65d3 100644 +--- a/doc/arm/security.inc.rst ++++ b/doc/arm/security.inc.rst +@@ -85,7 +85,7 @@ Limiting access to the server by outside parties can help prevent + spoofing and denial of service (DoS) attacks against the server. + + ACLs match clients on the basis of up to three characteristics: 1) The +-client's IP address; 2) the TSIG or SIG(0) key that was used to sign the ++client's IP address; 2) the TSIG key that was used to sign the + request, if any; and 3) an address prefix encoded in an EDNS + Client-Subnet option, if any. + +@@ -126,7 +126,7 @@ and no queries at all from the networks specified in ``bogusnets``. + + In addition to network addresses and prefixes, which are matched against + the source address of the DNS request, ACLs may include ``key`` +-elements, which specify the name of a TSIG or SIG(0) key. ++elements, which specify the name of a TSIG key. + + When BIND 9 is built with GeoIP support, ACLs can also be used for + geographic access restrictions. This is done by specifying an ACL +diff --git a/doc/arm/sig0.inc.rst b/doc/arm/sig0.inc.rst +index 048dbea..6e6fc32 100644 +--- a/doc/arm/sig0.inc.rst ++++ b/doc/arm/sig0.inc.rst +@@ -12,17 +12,5 @@ + SIG(0) + ------ + +-BIND partially supports DNSSEC SIG(0) transaction signatures as +-specified in :rfc:`2535` and :rfc:`2931`. SIG(0) uses public/private keys to +-authenticate messages. Access control is performed in the same manner as with +-TSIG keys; privileges can be granted or denied in ACL directives based +-on the key name. +- +-When a SIG(0) signed message is received, it is only verified if +-the key is known and trusted by the server. The server does not attempt +-to recursively fetch or validate the key. +- +-SIG(0) signing of multiple-message TCP streams is not supported. +- +-The only tool shipped with BIND 9 that generates SIG(0) signed messages +-is :iscman:`nsupdate`. ++Support for DNSSEC SIG(0) transaction signatures has been removed. ++This is a countermeasure for CVE-2024-1975. +diff --git a/lib/dns/message.c b/lib/dns/message.c +index 8654e92..a379125 100644 +--- a/lib/dns/message.c ++++ b/lib/dns/message.c +@@ -3288,111 +3288,24 @@ dns_message_dumpsig(dns_message_t *msg, char *txt1) { + + isc_result_t + dns_message_checksig(dns_message_t *msg, dns_view_t *view) { +- isc_buffer_t b, msgb; ++ isc_buffer_t msgb; + + REQUIRE(DNS_MESSAGE_VALID(msg)); + +- if (msg->tsigkey == NULL && msg->tsig == NULL && msg->sig0 == NULL) { ++ if (msg->tsigkey == NULL && msg->tsig == NULL) { + return (ISC_R_SUCCESS); + } + + INSIST(msg->saved.base != NULL); + isc_buffer_init(&msgb, msg->saved.base, msg->saved.length); + isc_buffer_add(&msgb, msg->saved.length); +- if (msg->tsigkey != NULL || msg->tsig != NULL) { + #ifdef SKAN_MSG_DEBUG +- dns_message_dumpsig(msg, "dns_message_checksig#1"); ++ dns_message_dumpsig(msg, "dns_message_checksig#1"); + #endif /* ifdef SKAN_MSG_DEBUG */ +- if (view != NULL) { +- return (dns_view_checksig(view, &msgb, msg)); +- } else { +- return (dns_tsig_verify(&msgb, msg, NULL, NULL)); +- } ++ if (view != NULL) { ++ return (dns_view_checksig(view, &msgb, msg)); + } else { +- dns_rdata_t rdata = DNS_RDATA_INIT; +- dns_rdata_sig_t sig; +- dns_rdataset_t keyset; +- isc_result_t result; +- +- result = dns_rdataset_first(msg->sig0); +- INSIST(result == ISC_R_SUCCESS); +- dns_rdataset_current(msg->sig0, &rdata); +- +- /* +- * This can occur when the message is a dynamic update, since +- * the rdata length checking is relaxed. This should not +- * happen in a well-formed message, since the SIG(0) is only +- * looked for in the additional section, and the dynamic update +- * meta-records are in the prerequisite and update sections. +- */ +- if (rdata.length == 0) { +- return (ISC_R_UNEXPECTEDEND); +- } +- +- result = dns_rdata_tostruct(&rdata, &sig, NULL); +- if (result != ISC_R_SUCCESS) { +- return (result); +- } +- +- dns_rdataset_init(&keyset); +- if (view == NULL) { +- result = DNS_R_KEYUNAUTHORIZED; +- goto freesig; +- } +- result = dns_view_simplefind(view, &sig.signer, +- dns_rdatatype_key /* SIG(0) */, 0, +- 0, false, &keyset, NULL); +- +- if (result != ISC_R_SUCCESS) { +- /* XXXBEW Should possibly create a fetch here */ +- result = DNS_R_KEYUNAUTHORIZED; +- goto freesig; +- } else if (keyset.trust < dns_trust_secure) { +- /* XXXBEW Should call a validator here */ +- result = DNS_R_KEYUNAUTHORIZED; +- goto freesig; +- } +- result = dns_rdataset_first(&keyset); +- INSIST(result == ISC_R_SUCCESS); +- for (; result == ISC_R_SUCCESS; +- result = dns_rdataset_next(&keyset)) +- { +- dst_key_t *key = NULL; +- +- dns_rdata_reset(&rdata); +- dns_rdataset_current(&keyset, &rdata); +- isc_buffer_init(&b, rdata.data, rdata.length); +- isc_buffer_add(&b, rdata.length); +- +- result = dst_key_fromdns(&sig.signer, rdata.rdclass, &b, +- view->mctx, &key); +- if (result != ISC_R_SUCCESS) { +- continue; +- } +- if (dst_key_alg(key) != sig.algorithm || +- dst_key_id(key) != sig.keyid || +- !(dst_key_proto(key) == DNS_KEYPROTO_DNSSEC || +- dst_key_proto(key) == DNS_KEYPROTO_ANY)) +- { +- dst_key_free(&key); +- continue; +- } +- result = dns_dnssec_verifymessage(&msgb, msg, key); +- dst_key_free(&key); +- if (result == ISC_R_SUCCESS) { +- break; +- } +- } +- if (result == ISC_R_NOMORE) { +- result = DNS_R_KEYUNAUTHORIZED; +- } +- +- freesig: +- if (dns_rdataset_isassociated(&keyset)) { +- dns_rdataset_disassociate(&keyset); +- } +- dns_rdata_freestruct(&sig); +- return (result); ++ return (dns_tsig_verify(&msgb, msg, NULL, NULL)); + } + } + +diff --git a/lib/ns/client.c b/lib/ns/client.c +index 8981222..5d2ad0b 100644 +--- a/lib/ns/client.c ++++ b/lib/ns/client.c +@@ -2168,6 +2168,13 @@ ns__client_request(isc_nmhandle_t *handle, isc_result_t eresult, + ns_client_log(client, DNS_LOGCATEGORY_SECURITY, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "request is signed by a nonauthoritative key"); ++ } else if (result == DNS_R_NOTVERIFIEDYET && ++ client->message->sig0 != NULL) ++ { ++ ns_client_log(client, DNS_LOGCATEGORY_SECURITY, ++ NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), ++ "request has a SIG(0) signature but its support " ++ "was removed (CVE-2024-1975)"); + } else { + char tsigrcode[64]; + isc_buffer_t b; +-- +2.33.0 + diff --git a/backport-CVE-2024-4076.patch b/backport-CVE-2024-4076.patch new file mode 100644 index 0000000000000000000000000000000000000000..02b3bb8437e6a935ec5282710fb5066fb53501ff --- /dev/null +++ b/backport-CVE-2024-4076.patch @@ -0,0 +1,34 @@ +From 9cfd20cd90fab4c97fe91f68555b7a2e05b808e8 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Tue, 16 Jan 2024 14:25:27 +1100 +Subject: [PATCH] Clear qctx->zversion + +Clear qctx->zversion when clearing qctx->zrdataset et al in +lib/ns/query.c:qctx_freedata. The uncleared pointer could lead to +an assertion failure if zone data needed to be re-saved which could +happen with stale data support enabled. + +(cherry picked from commit 179fb3532ab8d4898ab070b2db54c0ce872ef709) + +Conflict:NA +Reference:https://downloads.isc.org/isc/bind9/9.18.28/patches/0004-CVE-2024-4076.patch + +--- + lib/ns/query.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/ns/query.c b/lib/ns/query.c +index 40e1232..7884514 100644 +--- a/lib/ns/query.c ++++ b/lib/ns/query.c +@@ -5323,6 +5323,7 @@ qctx_freedata(query_ctx_t *qctx) { + ns_client_releasename(qctx->client, &qctx->zfname); + dns_db_detachnode(qctx->zdb, &qctx->znode); + dns_db_detach(&qctx->zdb); ++ qctx->zversion = NULL; + } + + if (qctx->event != NULL && !qctx->client->nodetach) { +-- +2.33.0 + diff --git a/backport-CVE-2025-40778-01.patch b/backport-CVE-2025-40778-01.patch new file mode 100644 index 0000000000000000000000000000000000000000..da2ff0820afaf2cb02251c19b0f0f05ffaf60c8c --- /dev/null +++ b/backport-CVE-2025-40778-01.patch @@ -0,0 +1,33 @@ +From 025d61bacd0f57f994a631654aff7a933d89a547 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Thu, 10 Jul 2025 09:37:36 +1000 +Subject: [PATCH] Tighten restrictions on caching NS RRsets in authority + section + +To prevent certain spoofing attacks, a new check has been added +to the existing rules for whether NS data can be cached: the owner +name of the NS RRset must be an ancestor of the name being queried. + +(cherry picked from commit fa153f791f9324bf84abf8d259e11c0531fe6e25) +--- + lib/dns/resolver.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c +index dc9c5b1c87..eb5d671c8f 100644 +--- a/lib/dns/resolver.c ++++ b/lib/dns/resolver.c +@@ -9247,7 +9247,9 @@ rctx_authority_positive(respctx_t *rctx) { + dns_message_currentname(rctx->query->rmessage, + DNS_SECTION_AUTHORITY, &name); + +- if (!name_external(name, dns_rdatatype_ns, fctx)) { ++ if (!name_external(name, dns_rdatatype_ns, fctx) && ++ dns_name_issubdomain(fctx->name, name)) ++ { + dns_rdataset_t *rdataset = NULL; + + /* +-- +2.33.0 + diff --git a/backport-CVE-2025-40778-02.patch b/backport-CVE-2025-40778-02.patch new file mode 100644 index 0000000000000000000000000000000000000000..acb8092092e89bc4ea1bfc32babfc064bb10957e --- /dev/null +++ b/backport-CVE-2025-40778-02.patch @@ -0,0 +1,68 @@ +From cd17dfe696cdf9b8ef23fbc8738de7c79f957846 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Thu, 14 Aug 2025 14:35:46 +1000 +Subject: [PATCH] Further restrict addresses that are cached when processing + referrals + +Use the owner name of the NS record as the bailwick apex name +when determining which additional records to cache, rather than +the name of the delegating zone (or a parent thereof). + +(cherry picked from commit a41054e9e606a61f1b3c8bc0c54e2f1059347165) +--- + lib/dns/resolver.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c +index eb5d671c8f..4364f0ac19 100644 +--- a/lib/dns/resolver.c ++++ b/lib/dns/resolver.c +@@ -7141,7 +7141,8 @@ mark_related(dns_name_t *name, dns_rdataset_t *rdataset, bool external, + * locally served zone. + */ + static inline bool +-name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) { ++name_external(const dns_name_t *name, dns_rdatatype_t type, respctx_t *rctx) { ++ fetchctx_t *fctx = rctx->fctx; + isc_result_t result; + dns_forwarders_t *forwarders = NULL; + dns_fixedname_t fixed, zfixed; +@@ -7154,7 +7155,7 @@ name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) { + dns_namereln_t rel; + + apex = (ISDUALSTACK(fctx->addrinfo) || !ISFORWARDER(fctx->addrinfo)) +- ? fctx->domain ++ ? rctx->ns_name != NULL ? rctx->ns_name : fctx->domain + : fctx->fwdname; + + /* +@@ -7263,7 +7264,7 @@ check_section(void *arg, const dns_name_t *addname, dns_rdatatype_t type, + result = dns_message_findname(rctx->query->rmessage, section, addname, + dns_rdatatype_any, 0, &name, NULL); + if (result == ISC_R_SUCCESS) { +- external = name_external(name, type, fctx); ++ external = name_external(name, type, rctx); + if (type == dns_rdatatype_a) { + for (rdataset = ISC_LIST_HEAD(name->list); + rdataset != NULL; +@@ -8923,7 +8924,7 @@ rctx_answer_scan(respctx_t *rctx) { + /* + * Don't accept DNAME from parent namespace. + */ +- if (name_external(name, dns_rdatatype_dname, fctx)) { ++ if (name_external(name, dns_rdatatype_dname, rctx)) { + continue; + } + +@@ -9247,7 +9248,7 @@ rctx_authority_positive(respctx_t *rctx) { + dns_message_currentname(rctx->query->rmessage, + DNS_SECTION_AUTHORITY, &name); + +- if (!name_external(name, dns_rdatatype_ns, fctx) && ++ if (!name_external(name, dns_rdatatype_ns, rctx) && + dns_name_issubdomain(fctx->name, name)) + { + dns_rdataset_t *rdataset = NULL; +-- +2.33.0 + diff --git a/backport-CVE-2025-40778-03.patch b/backport-CVE-2025-40778-03.patch new file mode 100644 index 0000000000000000000000000000000000000000..f49c4fdf4c470af71683a81a0fdfe75b52b069fc --- /dev/null +++ b/backport-CVE-2025-40778-03.patch @@ -0,0 +1,706 @@ +From 4c6d03b0bb2ffbafcde8e8a5bc0e49908b978a72 Mon Sep 17 00:00:00 2001 +From: Mark Andrews +Date: Wed, 13 Aug 2025 13:56:01 +1000 +Subject: [PATCH] Retry lookups with unsigned DNAME over TCP + +To prevent spoofed unsigned DNAME responses being accepted retry +response with unsigned DNAMEs over TCP if the response is not TSIG +signed or there isn't a good DNS CLIENT COOKIE. + +To prevent test failures, this required adding TCP support to the +ans3 and ans4 servers in the chain system test. + +(cherry picked from commit 2e40705c06831988106335ed77db3cf924d431f6) +--- + bin/tests/system/chain/ans3/ans.pl | 131 ----------------- + bin/tests/system/chain/ans3/ans.py | 217 +++++++++++++++++++++++++++++ + bin/tests/system/chain/ans4/ans.py | 57 ++++++-- + lib/dns/include/dns/message.h | 8 ++ + lib/dns/message.c | 14 +- + lib/dns/resolver.c | 99 ++++++++++--- + 6 files changed, 368 insertions(+), 158 deletions(-) + delete mode 100644 bin/tests/system/chain/ans3/ans.pl + create mode 100644 bin/tests/system/chain/ans3/ans.py + +diff --git a/bin/tests/system/chain/ans3/ans.pl b/bin/tests/system/chain/ans3/ans.pl +deleted file mode 100644 +index 271b2a4..0000000 +--- a/bin/tests/system/chain/ans3/ans.pl ++++ /dev/null +@@ -1,131 +0,0 @@ +-#!/usr/bin/env perl +- +-# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +-# +-# SPDX-License-Identifier: MPL-2.0 +-# +-# This Source Code Form is subject to the terms of the Mozilla Public +-# License, v. 2.0. If a copy of the MPL was not distributed with this +-# file, you can obtain one at https://mozilla.org/MPL/2.0/. +-# +-# See the COPYRIGHT file distributed with this work for additional +-# information regarding copyright ownership. +- +-use strict; +-use warnings; +- +-use IO::File; +-use Getopt::Long; +-use Net::DNS::Nameserver; +- +-my $pidf = new IO::File "ans.pid", "w" or die "cannot open pid file: $!"; +-print $pidf "$$\n" or die "cannot write pid file: $!"; +-$pidf->close or die "cannot close pid file: $!"; +-sub rmpid { unlink "ans.pid"; exit 1; }; +- +-$SIG{INT} = \&rmpid; +-$SIG{TERM} = \&rmpid; +- +-my $localaddr = "10.53.0.3"; +- +-my $localport = int($ENV{'PORT'}); +-if (!$localport) { $localport = 5300; } +- +-my $verbose = 0; +-my $ttl = 60; +-my $zone = "example.broken"; +-my $nsname = "ns3.$zone"; +-my $synth = "synth-then-dname.$zone"; +-my $synth2 = "synth2-then-dname.$zone"; +- +-sub reply_handler { +- my ($qname, $qclass, $qtype, $peerhost, $query, $conn) = @_; +- my ($rcode, @ans, @auth, @add); +- +- print ("request: $qname/$qtype\n"); +- STDOUT->flush(); +- +- if ($qname eq "example.broken") { +- if ($qtype eq "SOA") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass SOA . . 0 0 0 0 0"); +- push @ans, $rr; +- } elsif ($qtype eq "NS") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass NS $nsname"); +- push @ans, $rr; +- $rr = new Net::DNS::RR("$nsname $ttl $qclass A $localaddr"); +- push @add, $rr; +- } +- $rcode = "NOERROR"; +- } elsif ($qname eq "cname-to-$synth2") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name.$synth2"); +- push @ans, $rr; +- $rr = new Net::DNS::RR("name.$synth2 $ttl $qclass CNAME name"); +- push @ans, $rr; +- $rr = new Net::DNS::RR("$synth2 $ttl $qclass DNAME ."); +- push @ans, $rr; +- $rcode = "NOERROR"; +- } elsif ($qname eq "$synth" || $qname eq "$synth2") { +- if ($qtype eq "DNAME") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass DNAME ."); +- push @ans, $rr; +- } +- $rcode = "NOERROR"; +- } elsif ($qname eq "name.$synth") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name."); +- push @ans, $rr; +- $rr = new Net::DNS::RR("$synth $ttl $qclass DNAME ."); +- push @ans, $rr; +- $rcode = "NOERROR"; +- } elsif ($qname eq "name.$synth2") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name."); +- push @ans, $rr; +- $rr = new Net::DNS::RR("$synth2 $ttl $qclass DNAME ."); +- push @ans, $rr; +- $rcode = "NOERROR"; +- # The following three code branches referring to the "example.dname" +- # zone are necessary for the resolver variant of the CVE-2021-25215 +- # regression test to work. A named instance cannot be used for +- # serving the DNAME records below as a version of BIND vulnerable to +- # CVE-2021-25215 would crash while answering the queries asked by +- # the tested resolver. +- } elsif ($qname eq "ns3.example.dname") { +- if ($qtype eq "A") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass A 10.53.0.3"); +- push @ans, $rr; +- } +- if ($qtype eq "AAAA") { +- my $rr = new Net::DNS::RR("example.dname. $ttl $qclass SOA . . 0 0 0 0 $ttl"); +- push @auth, $rr; +- } +- $rcode = "NOERROR"; +- } elsif ($qname eq "self.example.self.example.dname") { +- my $rr = new Net::DNS::RR("self.example.dname. $ttl $qclass DNAME dname."); +- push @ans, $rr; +- $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME self.example.dname."); +- push @ans, $rr; +- $rcode = "NOERROR"; +- } elsif ($qname eq "self.example.dname") { +- if ($qtype eq "DNAME") { +- my $rr = new Net::DNS::RR("$qname $ttl $qclass DNAME dname."); +- push @ans, $rr; +- } +- $rcode = "NOERROR"; +- } else { +- $rcode = "REFUSED"; +- } +- return ($rcode, \@ans, \@auth, \@add, { aa => 1 }); +-} +- +-GetOptions( +- 'port=i' => \$localport, +- 'verbose!' => \$verbose, +-); +- +-my $ns = Net::DNS::Nameserver->new( +- LocalAddr => $localaddr, +- LocalPort => $localport, +- ReplyHandler => \&reply_handler, +- Verbose => $verbose, +-); +- +-$ns->main_loop; +diff --git a/bin/tests/system/chain/ans3/ans.py b/bin/tests/system/chain/ans3/ans.py +new file mode 100644 +index 0000000..0a031c1 +--- /dev/null ++++ b/bin/tests/system/chain/ans3/ans.py +@@ -0,0 +1,217 @@ ++# Copyright (C) Internet Systems Consortium, Inc. ("ISC") ++# ++# SPDX-License-Identifier: MPL-2.0 ++# ++# This Source Code Form is subject to the terms of the Mozilla Public ++# License, v. 2.0. If a copy of the MPL was not distributed with this ++# file, you can obtain one at https://mozilla.org/MPL/2.0/. ++# ++# See the COPYRIGHT file distributed with this work for additional ++# information regarding copyright ownership. ++ ++############################################################################ ++# ans.py: See README.anspy for details. ++############################################################################ ++ ++from __future__ import print_function ++import os ++import sys ++import signal ++import socket ++import select ++from datetime import datetime, timedelta ++import functools ++ ++import dns, dns.message, dns.query ++from dns.rdatatype import * ++from dns.rdataclass import * ++from dns.rcode import * ++from dns.name import * ++ ++ ++############################################################################ ++# Respond to a DNS query. ++############################################################################ ++def create_response(msg): ++ ttl = 60 ++ zone = "example.broken." ++ nsname = f"ns3.{zone}" ++ synth = f"synth-then-dname.{zone}" ++ synth2 = f"synth2-then-dname.{zone}" ++ ++ m = dns.message.from_wire(msg) ++ qname = m.question[0].name.to_text() ++ ++ # prepare the response and convert to wire format ++ r = dns.message.make_response(m) ++ ++ # get qtype ++ rrtype = m.question[0].rdtype ++ qtype = dns.rdatatype.to_text(rrtype) ++ print(f"request: {qname}/{qtype}") ++ ++ rcode = "NOERROR" ++ if qname == zone: ++ if qtype == "SOA": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, SOA, ". . 0 0 0 0 0")) ++ elif qtype == "NS": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, NS, nsname)) ++ r.additional.append(dns.rrset.from_text(nsname, ttl, IN, A, ip4)) ++ elif qname == f"cname-to-{synth2}": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, f"name.{synth2}")) ++ r.answer.append(dns.rrset.from_text(f"name.{synth2}", ttl, IN, CNAME, "name.")) ++ r.answer.append(dns.rrset.from_text(synth2, ttl, IN, DNAME, ".")) ++ elif qname == f"{synth}" or qname == f"{synth2}": ++ if qtype == "DNAME": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, DNAME, ".")) ++ elif qname == f"name.{synth}": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, "name.")) ++ r.answer.append(dns.rrset.from_text(synth, ttl, IN, DNAME, ".")) ++ elif qname == f"name.{synth2}": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, "name.")) ++ r.answer.append(dns.rrset.from_text(synth2, ttl, IN, DNAME, ".")) ++ elif qname == "ns3.example.dname.": ++ # This and the next two code branches referring to the "example.dname" ++ # zone are necessary for the resolver variant of the CVE-2021-25215 ++ # regression test to work. A named instance cannot be used for ++ # serving the DNAME records below as a version of BIND vulnerable to ++ # CVE-2021-25215 would crash while answering the queries asked by ++ # the tested resolver. ++ if qtype == "A": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, A, ip4)) ++ elif qtype == "AAAA": ++ r.authority.append( ++ dns.rrset.from_text("example.dname.", ttl, IN, SOA, ". . 0 0 0 0 0") ++ ) ++ elif qname == "self.example.self..example.dname.": ++ r.answer.append( ++ dns.rrset.from_text("self.example.dname.", ttl, IN, DNAME, "dname.") ++ ) ++ r.answer.append( ++ dns.rrset.from_text(qname, ttl, IN, CNAME, "self.example.dname.") ++ ) ++ elif qname == "self.example.dname.": ++ if qtype == "DNAME": ++ r.answer.append(dns.rrset.from_text(qname, ttl, IN, DNAME, "dname.")) ++ else: ++ rcode = "REFUSED" ++ ++ r.flags |= dns.flags.AA ++ r.use_edns() ++ return r.to_wire() ++ ++ ++def sigterm(signum, frame): ++ print("Shutting down now...") ++ os.remove("ans.pid") ++ running = False ++ sys.exit(0) ++ ++ ++############################################################################ ++# Main ++# ++# Set up responder and control channel, open the pid file, and start ++# the main loop, listening for queries on the query channel or commands ++# on the control channel and acting on them. ++############################################################################ ++ip4 = "10.53.0.3" ++ip6 = "fd92:7065:b8e:ffff::3" ++ ++try: ++ port = int(os.environ["PORT"]) ++except: ++ port = 5300 ++ ++query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) ++query4_udp.bind((ip4, port)) ++ ++query4_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++query4_tcp.bind((ip4, port)) ++query4_tcp.listen(1) ++query4_tcp.settimeout(1) ++ ++havev6 = True ++try: ++ query6_udp = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) ++ try: ++ query6_udp.bind((ip6, port)) ++ except: ++ query6_udp.close() ++ havev6 = False ++ ++ query6_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++ try: ++ query6_tcp.bind((ip4, port)) ++ query6_tcp.listen(1) ++ query6_tcp.settimeout(1) ++ except: ++ query6_tcp.close() ++ havev6 = False ++except: ++ havev6 = False ++ ++signal.signal(signal.SIGTERM, sigterm) ++ ++f = open("ans.pid", "w") ++pid = os.getpid() ++print(pid, file=f) ++f.close() ++ ++running = True ++ ++print("Listening on %s port %d" % (ip4, port)) ++if havev6: ++ print("Listening on %s port %d" % (ip6, port)) ++print("Ctrl-c to quit") ++ ++if havev6: ++ input = [query4_udp, query4_tcp, query6_udp, query6_tcp] ++else: ++ input = [query4_udp, query4_tcp] ++ ++while running: ++ try: ++ inputready, outputready, exceptready = select.select(input, [], []) ++ except select.error as e: ++ break ++ except socket.error as e: ++ break ++ except KeyboardInterrupt: ++ break ++ ++ for s in inputready: ++ if s == query4_udp or s == query6_udp: ++ print("Query received on %s" % (ip4 if s == query4_udp else ip6)) ++ # Handle incoming queries ++ msg = s.recvfrom(65535) ++ rsp = create_response(msg[0]) ++ if rsp: ++ s.sendto(rsp, msg[1]) ++ elif s == query4_tcp or s == query6_tcp: ++ try: ++ conn, _ = s.accept() ++ if s == query4_tcp or s == query6_tcp: ++ print( ++ "TCP Query received on %s" % (ip4 if s == query4_tcp else ip6), ++ end=" ", ++ ) ++ # get TCP message length ++ msg = conn.recv(2) ++ if len(msg) != 2: ++ print("couldn't read TCP message length") ++ continue ++ length = struct.unpack(">H", msg[:2])[0] ++ msg = conn.recv(length) ++ if len(msg) != length: ++ print("couldn't read TCP message") ++ continue ++ rsp = create_response(msg) ++ if rsp: ++ conn.send(struct.pack(">H", len(rsp))) ++ conn.send(rsp) ++ conn.close() ++ except socket.error as e: ++ print("error: %s" % str(e)) ++ if not running: ++ break +diff --git a/bin/tests/system/chain/ans4/ans.py b/bin/tests/system/chain/ans4/ans.py +index 839067f..66f0193 100755 +--- a/bin/tests/system/chain/ans4/ans.py ++++ b/bin/tests/system/chain/ans4/ans.py +@@ -316,16 +316,30 @@ try: + except: + ctrlport = 5300 + +-query4_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +-query4_socket.bind((ip4, port)) ++query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) ++query4_udp.bind((ip4, port)) ++ ++query4_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++query4_tcp.bind((ip4, port)) ++query4_tcp.listen(1) ++query4_tcp.settimeout(1) + + havev6 = True + try: +- query6_socket = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) ++ query6_udp = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) ++ try: ++ query6_udp.bind((ip6, port)) ++ except: ++ query6_udp.close() ++ havev6 = False ++ ++ query6_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: +- query6_socket.bind((ip6, port)) ++ query6_tcp.bind((ip4, port)) ++ query6_tcp.listen(1) ++ query6_tcp.settimeout(1) + except: +- query6_socket.close() ++ query6_tcp.close() + havev6 = False + except: + havev6 = False +@@ -350,9 +364,9 @@ print("Control channel on %s port %d" % (ip4, ctrlport)) + print("Ctrl-c to quit") + + if havev6: +- input = [query4_socket, query6_socket, ctrl_socket] ++ input = [query4_udp, query4_tcp, query6_udp, query6_tcp, ctrl_socket] + else: +- input = [query4_socket, ctrl_socket] ++ input = [query4_udp, query4_tcp, ctrl_socket] + + while running: + try: +@@ -375,12 +389,37 @@ while running: + break + ctl_channel(msg) + conn.close() +- if s == query4_socket or s == query6_socket: +- print("Query received on %s" % (ip4 if s == query4_socket else ip6)) ++ elif s == query4_udp or s == query6_udp: ++ print("Query received on %s" % (ip4 if s == query4_udp else ip6)) + # Handle incoming queries + msg = s.recvfrom(65535) + rsp = create_response(msg[0]) + if rsp: + s.sendto(rsp, msg[1]) ++ elif s == query4_tcp or s == query6_tcp: ++ try: ++ conn, _ = s.accept() ++ if s == query4_tcp or s == query6_tcp: ++ print( ++ "TCP Query received on %s" % (ip4 if s == query4_tcp else ip6), ++ end=" ", ++ ) ++ # get TCP message length ++ msg = conn.recv(2) ++ if len(msg) != 2: ++ print("couldn't read TCP message length") ++ continue ++ length = struct.unpack(">H", msg[:2])[0] ++ msg = conn.recv(length) ++ if len(msg) != length: ++ print("couldn't read TCP message") ++ continue ++ rsp = create_response(msg) ++ if rsp: ++ conn.send(struct.pack(">H", len(rsp))) ++ conn.send(rsp) ++ conn.close() ++ except socket.error as e: ++ print("error: %s" % str(e)) + if not running: + break +diff --git a/lib/dns/include/dns/message.h b/lib/dns/include/dns/message.h +index f15884a..c2efc19 100644 +--- a/lib/dns/include/dns/message.h ++++ b/lib/dns/include/dns/message.h +@@ -283,6 +283,7 @@ struct dns_message { + unsigned int tkey : 1; + unsigned int rdclass_set : 1; + unsigned int fuzzing : 1; ++ unsigned int has_dname : 1; + + unsigned int opt_reserved; + unsigned int sig_reserved; +@@ -1526,4 +1527,11 @@ dns_message_response_minttl(dns_message_t *msg, dns_ttl_t *pttl); + * \li 'pttl != NULL'. + */ + ++bool ++dns_message_hasdname(dns_message_t *msg); ++/*%< ++ * Return whether a DNAME was detected in the ANSWER section of a QUERY ++ * message when it was parsed. ++ */ ++ + ISC_LANG_ENDDECLS +diff --git a/lib/dns/message.c b/lib/dns/message.c +index a379125..523ecf8 100644 +--- a/lib/dns/message.c ++++ b/lib/dns/message.c +@@ -428,6 +428,7 @@ msginit(dns_message_t *m) { + m->cc_bad = 0; + m->tkey = 0; + m->rdclass_set = 0; ++ m->has_dname = 0; + m->querytsig = NULL; + m->indent.string = "\t"; + m->indent.count = 0; +@@ -1708,6 +1709,11 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, + */ + msg->tsigname->attributes |= DNS_NAMEATTR_NOCOMPRESS; + free_name = false; ++ } else if (rdtype == dns_rdatatype_dname && ++ sectionid == DNS_SECTION_ANSWER && ++ msg->opcode == dns_opcode_query) ++ { ++ msg->has_dname = 1; + } + rdataset = NULL; + +@@ -4865,5 +4871,11 @@ dns_message_response_minttl(dns_message_t *msg, dns_ttl_t *pttl) { + return (message_authority_soa_min(msg, pttl)); + } + +- return (ISC_R_SUCCESS); ++ return ISC_R_SUCCESS; ++} ++ ++bool ++dns_message_hasdname(dns_message_t *msg) { ++ REQUIRE(DNS_MESSAGE_VALID(msg)); ++ return msg->has_dname; + } +diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c +index 21e36f0..b156691 100644 +--- a/lib/dns/resolver.c ++++ b/lib/dns/resolver.c +@@ -795,6 +795,7 @@ typedef struct respctx { + bool get_nameservers; /* get a new NS rrset at + * zone cut? */ + bool resend; /* resend this query? */ ++ bool secured; /* message was signed or had a valid cookie */ + bool nextitem; /* invalid response; keep + * listening for the correct one */ + bool truncated; /* response was truncated */ +@@ -7656,7 +7657,48 @@ betterreferral(respctx_t *rctx) { + } + } + } +- return (false); ++ return false; ++} ++ ++static bool ++rctx_need_tcpretry(respctx_t *rctx) { ++ resquery_t *query = rctx->query; ++ if ((rctx->retryopts & DNS_FETCHOPT_TCP) != 0) { ++ /* TCP is already in the retry flags */ ++ return false; ++ } ++ ++ /* ++ * If the message was secured, no need to continue. ++ */ ++ if (rctx->secured) { ++ return false; ++ } ++ ++ /* ++ * Currently the only extra reason why we might need to ++ * retry a UDP response over TCP is a DNAME in the message. ++ */ ++ if (dns_message_hasdname(query->rmessage)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++static isc_result_t ++rctx_tcpretry(respctx_t *rctx) { ++ /* ++ * Do we need to retry a UDP response over TCP? ++ */ ++ if (rctx_need_tcpretry(rctx)) { ++ rctx->retryopts |= DNS_FETCHOPT_TCP; ++ rctx->resend = true; ++ rctx_done(rctx, ISC_R_SUCCESS); ++ return ISC_R_COMPLETE; ++ } ++ ++ return ISC_R_SUCCESS; + } + + /* +@@ -7848,6 +7890,17 @@ resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) { + return; + } + ++ /* ++ * Remember whether this message was signed or had a ++ * valid client cookie; if not, we may need to retry over ++ * TCP later. ++ */ ++ if (query->rmessage->cc_ok || query->rmessage->tsig != NULL || ++ query->rmessage->sig0 != NULL) ++ { ++ rctx.secured = true; ++ } ++ + /* + * The dispatcher should ensure we only get responses with QR + * set. +@@ -7859,10 +7912,7 @@ resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) { + * TCP. This may be a misconfigured anycast server or an attempt + * to send a spoofed response. Skip if we have a valid tsig. + */ +- if (dns_message_gettsig(query->rmessage, NULL) == NULL && +- !query->rmessage->cc_ok && !query->rmessage->cc_bad && +- (rctx.retryopts & DNS_FETCHOPT_TCP) == 0) +- { ++ if (!rctx.secured && (rctx.retryopts & DNS_FETCHOPT_TCP) == 0) { + unsigned char cookie[COOKIE_BUFFER_SIZE]; + if (dns_adb_getcookie(fctx->adb, query->addrinfo, cookie, + sizeof(cookie)) > CLIENT_COOKIE_SIZE) +@@ -7874,8 +7924,7 @@ resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) { + isc_log_write( + dns_lctx, DNS_LOGCATEGORY_RESOLVER, + DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO, +- "missing expected cookie " +- "from %s", ++ "missing expected cookie from %s", + addrbuf); + } + rctx.retryopts |= DNS_FETCHOPT_TCP; +@@ -7885,6 +7934,17 @@ resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) { + } + } + ++ /* ++ * Check whether we need to retry over TCP for some other reason. ++ */ ++ result = rctx_tcpretry(&rctx); ++ if (result == ISC_R_COMPLETE) { ++ return; ++ } ++ ++ /* ++ * Check for EDNS issues. ++ */ + rctx_edns(&rctx); + + /* +@@ -8665,8 +8725,8 @@ rctx_answer_positive(respctx_t *rctx) { + } + + /* +- * Cache records in the authority section, if +- * there are any suitable for caching. ++ * Cache records in the authority section, if there are ++ * any suitable for caching. + */ + rctx_authority_positive(rctx); + +@@ -9039,14 +9099,14 @@ rctx_answer_dname(respctx_t *rctx) { + + /* + * rctx_authority_positive(): +- * Examine the records in the authority section (if there are any) for a +- * positive answer. We expect the names for all rdatasets in this +- * section to be subdomains of the domain being queried; any that are +- * not are skipped. We expect to find only *one* owner name; any names +- * after the first one processed are ignored. We expect to find only +- * rdatasets of type NS, RRSIG, or SIG; all others are ignored. Whatever +- * remains can be cached at trust level authauthority or additional +- * (depending on whether the AA bit was set on the answer). ++ * If a positive answer was received over TCP or secured with a cookie ++ * or TSIG, examine the authority section. We expect names for all ++ * rdatasets in this section to be subdomains of the domain being queried; ++ * any that are not are skipped. We expect to find only *one* owner name; ++ * any names after the first one processed are ignored. We expect to find ++ * only rdatasets of type NS; all others are ignored. Whatever remains can ++ * be cached at trust level authauthority or additional (depending on ++ * whether the AA bit was set on the answer). + */ + static void + rctx_authority_positive(respctx_t *rctx) { +@@ -9054,6 +9114,11 @@ rctx_authority_positive(respctx_t *rctx) { + bool done = false; + isc_result_t result; + ++ /* If it's spoofable, don't cache it. */ ++ if (!rctx->secured && (rctx->query->options & DNS_FETCHOPT_TCP) == 0) { ++ return; ++ } ++ + result = dns_message_firstname(rctx->query->rmessage, + DNS_SECTION_AUTHORITY); + while (!done && result == ISC_R_SUCCESS) { +-- +2.33.0 + diff --git a/backport-CVE-2025-40780.patch b/backport-CVE-2025-40780.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb4ea5948baf9e445b4a2186aef2a08b2fc3de32 --- /dev/null +++ b/backport-CVE-2025-40780.patch @@ -0,0 +1,329 @@ +From 8330b49fb90bfeae14b47b7983e9459cc2bbaffe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Tue, 19 Aug 2025 19:22:18 +0200 +Subject: [PATCH] Use cryptographically-secure pseudo-random generator + everywhere + +It was discovered in an upcoming academic paper that a xoshiro128** +internal state can be recovered by an external 3rd party allowing to +predict UDP ports and DNS IDs in the outgoing queries. This could lead +to an attacker spoofing the DNS answers with great efficiency and +poisoning the DNS cache. + +Change the internal random generator to system CSPRNG with buffering to +avoid excessive syscalls. + +Thanks Omer Ben Simhon and Amit Klein of Hebrew University of Jerusalem +for responsibly reporting this to us. Very cool research! + +(cherry picked from commit cffcab9d5f3e709002f331b72498fcc229786ae2) +--- + lib/isc/include/isc/random.h | 2 +- + lib/isc/random.c | 225 ++++++++++++++--------------------- + tests/isc/random_test.c | 4 +- + 3 files changed, 96 insertions(+), 135 deletions(-) + +diff --git a/lib/isc/include/isc/random.h b/lib/isc/include/isc/random.h +index 1e30d0c..fd55343 100644 +--- a/lib/isc/include/isc/random.h ++++ b/lib/isc/include/isc/random.h +@@ -20,7 +20,7 @@ + #include + + /*! \file isc/random.h +- * \brief Implements wrapper around a non-cryptographically secure ++ * \brief Implements wrapper around a cryptographically secure + * pseudo-random number generator. + * + */ +diff --git a/lib/isc/random.c b/lib/isc/random.c +index 7eead66..6f37f5d 100644 +--- a/lib/isc/random.c ++++ b/lib/isc/random.c +@@ -31,176 +31,135 @@ + */ + + #include +-#include +-#include +-#include ++#include + +-#include ++#include + #include +-#include + #include +-#include + #include + + #include "entropy_private.h" + +-/* +- * The specific implementation for PRNG is included as a C file +- * that has to provide a static variable named seed, and a function +- * uint32_t next(void) that provides next random number. +- * +- * The implementation must be thread-safe. +- */ +- +-/* +- * Two contestants have been considered: the xoroshiro family of the +- * functions by Villa&Blackman, and PCG by O'Neill. After +- * consideration, the xoshiro128starstar function has been chosen as +- * the uint32_t random number provider because it is very fast and has +- * good enough properties for our usage pattern. +- */ +- +-/* +- * Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) +- * +- * To the extent possible under law, the author has dedicated all +- * copyright and related and neighboring rights to this software to the +- * public domain worldwide. This software is distributed without any +- * warranty. +- * +- * See . +- */ ++#define ISC_RANDOM_BUFSIZE (ISC_OS_CACHELINE_SIZE / sizeof(uint32_t)) + +-/* +- * This is xoshiro128** 1.0, our 32-bit all-purpose, rock-solid generator. +- * It has excellent (sub-ns) speed, a state size (128 bits) that is large +- * enough for mild parallelism, and it passes all tests we are aware of. +- * +- * For generating just single-precision (i.e., 32-bit) floating-point +- * numbers, xoshiro128+ is even faster. +- * +- * The state must be seeded so that it is not everywhere zero. +- */ +-static thread_local uint32_t seed[4] = { 0 }; ++thread_local static uint32_t isc__random_pool[ISC_RANDOM_BUFSIZE]; ++thread_local static size_t isc__random_pos = ISC_RANDOM_BUFSIZE; + + static uint32_t +-rotl(const uint32_t x, int k) { +- return ((x << k) | (x >> (32 - k))); +-} +- +-static uint32_t +-next(void) { +- uint32_t result_starstar, t; +- +- result_starstar = rotl(seed[0] * 5, 7) * 9; +- t = seed[1] << 9; +- +- seed[2] ^= seed[0]; +- seed[3] ^= seed[1]; +- seed[1] ^= seed[2]; +- seed[0] ^= seed[3]; +- +- seed[2] ^= t; +- +- seed[3] = rotl(seed[3], 11); +- +- return (result_starstar); +-} +- +-static thread_local isc_once_t isc_random_once = ISC_ONCE_INIT; +- +-static void +-isc_random_initialize(void) { +- int useed[4] = { 0, 0, 0, 1 }; ++random_u32(void) { + #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* +- * Set a constant seed to help in problem reproduction should fuzzing +- * find a crash or a hang. The seed array must be non-zero else +- * xoshiro128starstar will generate an infinite series of zeroes. ++ * A fixed stream of numbers helps with problem reproduction when ++ * fuzzing. The first result needs to be non-zero as expected by ++ * random_test.c (it starts with ISC_RANDOM_BUFSIZE, see above). + */ +-#else /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */ +- isc_entropy_get(useed, sizeof(useed)); ++ return (uint32_t)(isc__random_pos++); + #endif /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */ +- memmove(seed, useed, sizeof(seed)); ++ ++ if (isc__random_pos == ISC_RANDOM_BUFSIZE) { ++ isc_entropy_get(isc__random_pool, sizeof(isc__random_pool)); ++ isc__random_pos = 0; ++ } ++ ++ return isc__random_pool[isc__random_pos++]; + } + + uint8_t + isc_random8(void) { +- RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) == +- ISC_R_SUCCESS); +- return (next() & 0xff); ++ return (uint8_t)random_u32(); + } + + uint16_t + isc_random16(void) { +- RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) == +- ISC_R_SUCCESS); +- return (next() & 0xffff); ++ return (uint16_t)random_u32(); + } + + uint32_t + isc_random32(void) { +- RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) == +- ISC_R_SUCCESS); +- return (next()); ++ return random_u32(); + } + + void + isc_random_buf(void *buf, size_t buflen) { +- int i; +- uint32_t r; +- +- REQUIRE(buf != NULL); +- REQUIRE(buflen > 0); +- +- RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) == +- ISC_R_SUCCESS); ++ REQUIRE(buflen == 0 || buf != NULL); + +- for (i = 0; i + sizeof(r) <= buflen; i += sizeof(r)) { +- r = next(); +- memmove((uint8_t *)buf + i, &r, sizeof(r)); ++ if (buf == NULL || buflen == 0) { ++ return; + } +- r = next(); +- memmove((uint8_t *)buf + i, &r, buflen % sizeof(r)); +- return; ++ ++ isc_entropy_get(buf, buflen); + } + + uint32_t +-isc_random_uniform(uint32_t upper_bound) { +- /* Copy of arc4random_uniform from OpenBSD */ +- uint32_t r, min; +- +- RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) == +- ISC_R_SUCCESS); +- +- if (upper_bound < 2) { +- return (0); +- } +- +-#if (ULONG_MAX > 0xffffffffUL) +- min = 0x100000000UL % upper_bound; +-#else /* if (ULONG_MAX > 0xffffffffUL) */ +- /* Calculate (2**32 % upper_bound) avoiding 64-bit math */ +- if (upper_bound > 0x80000000) { +- min = 1 + ~upper_bound; /* 2**32 - upper_bound */ +- } else { +- /* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */ +- min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound; +- } +-#endif /* if (ULONG_MAX > 0xffffffffUL) */ +- ++isc_random_uniform(uint32_t limit) { + /* +- * This could theoretically loop forever but each retry has +- * p > 0.5 (worst case, usually far better) of selecting a +- * number inside the range we need, so it should rarely need +- * to re-roll. ++ * Daniel Lemire's nearly-divisionless unbiased bounded random numbers. ++ * ++ * https://lemire.me/blog/?p=17551 ++ * ++ * The raw random number generator `next()` returns a 32-bit value. ++ * We do a 64-bit multiply `next() * limit` and treat the product as a ++ * 32.32 fixed-point value less than the limit. Our result will be the ++ * integer part (upper 32 bits), and we will use the fraction part ++ * (lower 32 bits) to determine whether or not we need to resample. + */ +- for (;;) { +- r = next(); +- if (r >= min) { +- break; ++ uint64_t num = (uint64_t)random_u32() * (uint64_t)limit; ++ /* ++ * In the fast path, we avoid doing a division in most cases by ++ * comparing the fraction part of `num` with the limit, which is ++ * a slight over-estimate for the exact resample threshold. ++ */ ++ if ((uint32_t)(num) < limit) { ++ /* ++ * We are in the slow path where we re-do the approximate test ++ * more accurately. The exact threshold for the resample loop ++ * is the remainder after dividing the raw RNG limit `1 << 32` ++ * by the caller's limit. We use a trick to calculate it ++ * within 32 bits: ++ * ++ * (1 << 32) % limit ++ * == ((1 << 32) - limit) % limit ++ * == (uint32_t)(-limit) % limit ++ * ++ * This division is safe: we know that `limit` is strictly ++ * greater than zero because of the slow-path test above. ++ */ ++ uint32_t residue = (uint32_t)(-limit) % limit; ++ /* ++ * Unless we get one of `N = (1 << 32) - residue` valid ++ * values, we reject the sample. This `N` is a multiple of ++ * `limit`, so our results will be unbiased; and `N` is the ++ * largest multiple that fits in 32 bits, so rejections are as ++ * rare as possible. ++ * ++ * There are `limit` possible values for the integer part of ++ * our fixed-point number. Each one corresponds to `N/limit` ++ * or `N/limit + 1` possible fraction parts. For our result to ++ * be unbiased, every possible integer part must have the same ++ * number of possible valid fraction parts. So, when we get ++ * the superfluous value in the `N/limit + 1` cases, we need ++ * to reject and resample. ++ * ++ * Because of the multiplication, the possible values in the ++ * fraction part are equally spaced by `limit`, with varying ++ * gaps at each end of the fraction's 32-bit range. We will ++ * choose a range of size `N` (a multiple of `limit`) into ++ * which valid fraction values must fall, with the rest of the ++ * 32-bit range covered by the `residue`. Lemire's paper says ++ * that exactly `N/limit` possible values spaced apart by ++ * `limit` will fit into our size `N` valid range, regardless ++ * of the size of the end gaps, the phase alignment of the ++ * values, or the position of the range. ++ * ++ * So, when a fraction value falls in the `residue` outside ++ * our valid range, it is superfluous, and we resample. ++ */ ++ while ((uint32_t)(num) < residue) { ++ num = (uint64_t)random_u32() * (uint64_t)limit; + } + } +- +- return (r % upper_bound); ++ /* ++ * Return the integer part (upper 32 bits). ++ */ ++ return (uint32_t)(num >> 32); + } +diff --git a/tests/isc/random_test.c b/tests/isc/random_test.c +index 1935846..0016252 100644 +--- a/tests/isc/random_test.c ++++ b/tests/isc/random_test.c +@@ -321,7 +321,9 @@ random_test(pvalue_func_t *func, isc_random_func test_func) { + } + break; + case ISC_RANDOM_BYTES: +- isc_random_buf(values, sizeof(values)); ++ for (i = 0; i < ARRAY_SIZE(values); i++) { ++ values[i] = isc_random32(); ++ } + break; + case ISC_RANDOM_UNIFORM: + uniform_values = (uint16_t *)values; +-- +2.33.0 + diff --git a/backport-CVE-2025-8677.patch b/backport-CVE-2025-8677.patch new file mode 100644 index 0000000000000000000000000000000000000000..751b3a2f76388e3c89b1c7ab6c2c607624b54e9c --- /dev/null +++ b/backport-CVE-2025-8677.patch @@ -0,0 +1,85 @@ +From 7c5b8ef055900224f0424c341927562c5a9ebe19 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Tue, 22 Jul 2025 08:07:02 +0200 +Subject: [PATCH] Fail the DNSSEC validation if matching but invalid DNSKEY is + found + +If a matching but cryptographically invalid key was encountered during +the DNSSEC validation, the key would be just skipped and not counted +towards validation failures. Treat such DNSSEC keys as hard failures +and fail the DNSSEC validation immediatelly instead of continuing the +DNSSEC validation with the next DNSKEYs in the RRset. + +Co-authored-by: Matthijs Mekking + +(cherry picked from commit f00117a4226be90d1bc865aff19bddf114242914) +--- + lib/dns/validator.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/lib/dns/validator.c b/lib/dns/validator.c +index 55138d2590..6c21d35e64 100644 +--- a/lib/dns/validator.c ++++ b/lib/dns/validator.c +@@ -435,6 +435,8 @@ fetch_callback_dnskey(isc_task_t *task, isc_event_t *event) { + result = select_signing_key(val, rdataset); + if (result == ISC_R_SUCCESS) { + val->keyset = &val->frdataset; ++ } else { ++ val->failed = true; + } + } + result = validate_answer(val, true); +@@ -1174,6 +1176,8 @@ select_signing_key(dns_validator_t *val, dns_rdataset_t *rdataset) { + goto done; + } + dst_key_free(&val->key); ++ } else { ++ break; + } + dns_rdata_reset(&rdata); + result = dns_rdataset_next(rdataset); +@@ -1291,13 +1295,15 @@ seek_dnskey(dns_validator_t *val) { + "keyset with trust %s", + dns_trust_totext(val->frdataset.trust)); + result = select_signing_key(val, val->keyset); +- if (result != ISC_R_SUCCESS) { ++ if (result == ISC_R_NOTFOUND) { + /* +- * Either the key we're looking for is not +- * in the rrset, or something bad happened. +- * Give up. ++ * The key we're looking for is not ++ * in the rrset + */ + result = DNS_R_CONTINUE; ++ } else if (result != ISC_R_SUCCESS) { ++ /* Something bad happened. Give up. */ ++ break; + } + } + break; +@@ -1417,7 +1423,7 @@ selfsigned_dnskey(dns_validator_t *val) { + result = dns_dnssec_keyfromrdata(name, &keyrdata, mctx, + &dstkey); + if (result != ISC_R_SUCCESS) { +- continue; ++ return result; + } + + /* +@@ -1688,10 +1694,7 @@ check_signer(dns_validator_t *val, dns_rdata_t *keyrdata, uint16_t keyid, + val->event->name, keyrdata, val->view->mctx, + &dstkey); + if (result != ISC_R_SUCCESS) { +- /* +- * This really shouldn't happen, but... +- */ +- continue; ++ return result; + } + } + result = verify(val, dstkey, &rdata, sig.keyid); +-- +2.33.0 + diff --git a/backport-optimize-the-slabheader-placement-for-certain-RRtypes.patch b/backport-optimize-the-slabheader-placement-for-certain-RRtypes.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c338127ad15fe24b796cfd8ef7c5477655213d9 --- /dev/null +++ b/backport-optimize-the-slabheader-placement-for-certain-RRtypes.patch @@ -0,0 +1,98 @@ +From 8ef414a7f38a04cfc11df44adaedaf3126fa3878 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= +Date: Mon, 29 Jan 2024 16:36:30 +0100 +Subject: [PATCH] Optimize the slabheader placement for certain RRTypes + +Mark the infrastructure RRTypes as "priority" types and place them at +the beginning of the rdataslab header data graph. The non-priority +types either go right after the priority types (if any). + +(cherry picked from commit 3ac482be7fd058d284e89873021339579fad0615) + +Conflict:NA +Reference:https://gitlab.isc.org/isc-projects/bind9/-/commit/8ef414a7f38a04cfc11df44adaedaf3126fa3878 + +--- + lib/dns/rbtdb.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 42 insertions(+), 2 deletions(-) + +diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c +index 7793be8..bc0f8d8 100644 +--- a/lib/dns/rbtdb.c ++++ b/lib/dns/rbtdb.c +@@ -906,6 +906,30 @@ set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) { + } + } + ++static bool ++prio_type(rbtdb_rdatatype_t type) { ++ switch (type) { ++ case dns_rdatatype_soa: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa): ++ case dns_rdatatype_a: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_a): ++ case dns_rdatatype_aaaa: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_aaaa): ++ case dns_rdatatype_nsec: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec): ++ case dns_rdatatype_nsec3: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3): ++ case dns_rdatatype_ns: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns): ++ case dns_rdatatype_ds: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds): ++ case dns_rdatatype_cname: ++ case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname): ++ return (true); ++ } ++ return (false); ++} ++ + /*% + * These functions allow the heap code to rank the priority of each + * element. It returns true if v1 happens "sooner" than v2. +@@ -6167,6 +6191,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + rbtdb_changed_t *changed = NULL; + rdatasetheader_t *topheader = NULL, *topheader_prev = NULL; + rdatasetheader_t *header = NULL, *sigheader = NULL; ++ rdatasetheader_t *prioheader = NULL; + unsigned char *merged = NULL; + isc_result_t result; + bool header_nx; +@@ -6313,6 +6338,9 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { ++ if (prio_type(topheader->type)) { ++ prioheader = topheader; ++ } + if (topheader->type == newheader->type || + topheader->type == negtype) + { +@@ -6679,9 +6707,21 @@ find_header: + /* + * No rdatasets of the given type exist at the node. + */ +- newheader->next = rbtnode->data; + newheader->down = NULL; +- rbtnode->data = newheader; ++ ++ if (prio_type(newheader->type)) { ++ /* This is a priority type, prepend it */ ++ newheader->next = rbtnode->data; ++ rbtnode->data = newheader; ++ } else if (prioheader != NULL) { ++ /* Append after the priority headers */ ++ newheader->next = prioheader->next; ++ prioheader->next = newheader; ++ } else { ++ /* There were no priority headers */ ++ newheader->next = rbtnode->data; ++ rbtnode->data = newheader; ++ } + } + } + +-- +2.33.0 + diff --git a/bind-9.11.12.tar.gz.asc b/bind-9.11.12.tar.gz.asc new file mode 100644 index 0000000000000000000000000000000000000000..6d7992f056a0b4574f5488e3e0f5881a685ff975 --- /dev/null +++ b/bind-9.11.12.tar.gz.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABAgAdFiEErj+seWcR7Fn8AHqkdLtrmky7PTgFAl2WMooACgkQdLtrmky7 +PThv2RAAnXNLYTzXtH6ls29tRm5Hc+D6UaeqcWDNQ4BpkRVhrFxtukalGCi9mmB6 +NPJzFyXmaOW654pypCIuEgqJNFUpDtLzLzT7SUF+mhm+5plsaRSBnh4mq87l5KSp +twODAPnfCJV+HBk5RmToLEstAbGQ7xEBTyQtZoFkY+V7zEFwENKiCvWsoSWOkYR3 +zXo3sKjc83HV9ShbW/mCtbZf5L0qlbrKOAzqJfAFMhNNJi8kMbmr/Zi2sIfN+Rhv +g8HQo89Epv6r51yAdeED8idIX4rKjjcEtHrZeDmLdCcdHgSEj2sIlH92Joce6vL0 +S59A0rItIXm6fW8sz6WNpcj4tVtWYbIYjXZ4SPFNkaUrHv8cUekq+5vbI+v07Gh3 +2bhtDsDyTY5I1/AsY/EFmwkCAjUS00jZryBnuJpLB3v5JtUog4ek32yLBzPrqRBo +1876j4nlXAia8mG0OgJNWZ0gHyUPe/TgfR8fQDLmHxHHlKrJNTEwY6bLW8jzFTX1 +zk510fI1K7J9tiQgf5wcBQ2h3EBlqzDNIJDovoATzLYIf0HKyVegh/vnQdtdEhUR +1DzJAt3bsBfAP1AFfWPD/ACu5Zdm7SxY1wE/pjkwttDU3sRZqOfuwNBGeolu3cVN +O9/h1zsyVeVS0ui2vu4+V4EvNitmXsVbG2doDq9L5yBiIKGO2Ew= +=GCy6 +-----END PGP SIGNATURE----- diff --git a/bind-9.14.7.tar.gz.asc b/bind-9.14.7.tar.gz.asc new file mode 100644 index 0000000000000000000000000000000000000000..1134ae1b301709f81bda531559d6a9a421923c8f --- /dev/null +++ b/bind-9.14.7.tar.gz.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABAgAdFiEErj+seWcR7Fn8AHqkdLtrmky7PTgFAl2WMpEACgkQdLtrmky7 +PTh/sg//QbNRAQvADQfwF1PPo+JxB+3WzQ9oJAWeHbOoiubwkUwO9xE+BEnTNd5o +oM1lSLqFxNykOTaoeJlqPftPod1cxo7lSzkwflugGyB/59wliCpqCg053YV4x9mO +QggvA/E50+0FI/Om/7v4GHGADu/JE83FovOueWAB0LgqfDSD6QFcNFF9sUJJ4P7r +FcEXSWj8QbrHMWBKncZUOpD2ECotvtrYmi0DTHl1XfigESDQpWtsnTFuabCCsvkh +ch9wQRplAes2Mf/aS5tl1y0QKKBFuEjtGiTdgrDl6o9GLnx6CueX5saZehu2EVkr +fq2vEYUC2lRQSjuxSMMJ3L0TGUcl7+ixlAIISS2K9L5Xx7MhBXt/EH5KiKPfsEet +3EH+DhxV5uXjDU7MgvREnxT+ssV23e0HWTz4tVVQ9LpvYmWPIgLcSOhHCc57yoQF +c46V0f69dMWbMAlQ93EZSG274ZvpIszpK8+3hGI3/TuDFFgiQJeJJBFVtYJMle69 +3mEEclfzO7fBiXZFec6nVx2309bL64bafN7zszPKXl4XgoefOfD0v0eWqQT4fxfm +dnGC0qMqSZs5F+d0fISV5JUUNYzt9PZjvnzqLLGOeTF6l3/n9G1mmNsXcxJ1OEIF +6qh1oO7JTPjt0MFhKac4QjNQi/Bnp25O3I/PRyWZCbiwXkyvyQU= +=ZT7s +-----END PGP SIGNATURE----- diff --git a/bind-9.16.37.tar.xz.asc b/bind-9.16.37.tar.xz.asc new file mode 100644 index 0000000000000000000000000000000000000000..8ce1fd0e0129212bce00eda8117d3ff4a46bb63d --- /dev/null +++ b/bind-9.16.37.tar.xz.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABCgAdFiEE2ZzOr4eXRwFPA41jGC4jV5Ri76oFAmPFaWcACgkQGC4jV5Ri +76oXaw/+OCf5ZKRNIM4Gr2nbdDSPNhHblFmOdAAgwX8929uZaiciJDV3UvPXsIZw +76XsGcECe8Ri6gq7xU8bUsNC7O8eiT2n0mtDAFdJDt9LyQeJVi/UwidzYXRjSukf +nQJYz2kBR/eO07JozqLS1oOm7bM7NmDdbMPgTk7qyKUkd8lXeph74gUo8X8LtVJV +03xwdYOPFO8mJtebq8i9pullVeE5AMc3qZW/FnNkgHcLZATXWE1K4ie1uw6YynWv +j/P87WdUKER4kXWg4030+isKaj4jeIevBKCdDxS2ZZvlN1ioK2+XKvyGyIouB6bF +BW3z6ndtZ353r40ajdliEa2eZdMlI9kPUzaMgRWnynyG8u3wkLSZoWuZJiSXiSJI +ccuT9c/O0kQQdbyqtWaY7CwJnG2pSCzRjWBpifHKOnCBCzCYrS0PIO0gsV88JZeg +p37mZBrwUQ7FaxXs3BRiYi0JNLDdhpBnSM1MbD1Q0KuH8Ndb/gBHV9p3BlpbLAuj +RGEvgDWEY97VEzkTLO7G+2BIh3TZAYtyHgO7TSuxyEor8FeQsVBawFOS9ZHTU+U6 +38ghgFBV5Svqs2R9Ri/xcPEaaKjqgDb+6KVaAIpvnHwW28yiHZj8Fwzj0T2ojC/r +53uz/dHpW9gXerWSH7gDsd4kjAJKZEC7x84CNwMpZ4kn6CkhQVE= +=DAsV +-----END PGP SIGNATURE----- diff --git a/bind-9.20.16.tar.xz b/bind-9.18.21.tar.xz similarity index 42% rename from bind-9.20.16.tar.xz rename to bind-9.18.21.tar.xz index 92b76fce9cee6a6d5377d69ba6e373060f2f4a80..c2b86b24ce539ee5c1429ffe417f71daa7d6476e 100644 Binary files a/bind-9.20.16.tar.xz and b/bind-9.18.21.tar.xz differ diff --git a/bind-9.18.21.tar.xz.asc b/bind-9.18.21.tar.xz.asc new file mode 100644 index 0000000000000000000000000000000000000000..b4b062c8a31050b6d41b41acd0b8ca88a94e0745 --- /dev/null +++ b/bind-9.18.21.tar.xz.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABCgAdFiEEcGtsKGIOdvkdEfffUQpkKgbFLOwFAmV3BGsACgkQUQpkKgbF +LOwu9w/+JciqKqT0JieUDwPzEhhulBCWEhbZFrHK6dFM5UkPHkaV79QkZAQEhnq1 +FXVEF99ZuTbz5s79wNAZ9I4AiU0al5RK1P5MwMBbjsQrfnkhmKnPIU1jx3FSVrCP +tC9l1xEjkLNi2vf28ZQ9KED2hUdqsgTZqDvgewEnrq1NtZ0K7ozz9nHQLfooDSJT +L5U9HDp3vf5BJWONjnKAPjJJdeRf7HPqokJVSjQcVxrT06VsMNUFFmyCbEJ0UTJm +mqDrRuEXhkAKf40DwMr0qGqiq5Q4m960yADEK1Aju/9cEf6Ag4FYyy70iyICe7Tj +T8qjVzzwboUJao3m/152+6qvzGXJKdUUZqCnNcCc2wmirmg/ES4DLLFyYYXBflj7 +hWCOLXeghF/785te4fmiH3gqcEZBEVcc0wl1HCL5m3q9kGutGgLJVOZgM5D6zf2T +0Sa60qIr5r+cKCS9OYowTH1+NqEsW4XhCVIe/RYEuXa3FFczIUbdGlUQ5t9ILBxi +zbZ04Tj0tecqUVkhoEYZfQzhHEa43LzxATdQ4Zc01USaxhbSFSoyG1+WP1tPD+PL +wqZA9tEuvKtngr/UP+BeLG0lWv5zbtShzM1V1cEg7JuoiI2onWstaN7NYXShiUMZ +oVYXIBbmNbXVmm2TYzt4mw9TotGWHkSNjPZGvvAYw/0mtcw6NXs= +=bzR1 +-----END PGP SIGNATURE----- diff --git a/bind-9.20.16.tar.xz.asc b/bind-9.20.16.tar.xz.asc deleted file mode 100644 index e805108ce152cd867a3a1e411a5088c0d68af78f..0000000000000000000000000000000000000000 --- a/bind-9.20.16.tar.xz.asc +++ /dev/null @@ -1,16 +0,0 @@ ------BEGIN PGP SIGNATURE----- - -iQIzBAABCgAdFiEEcGtsKGIOdvkdEfffUQpkKgbFLOwFAmkR618ACgkQUQpkKgbF -LOxqURAAqHv/YJolhC28m5CHM3TQlAKo5dsX7bs1yzhGK63XlT89+L+pGmEYN4Ja -McQ8OCOvvJI/vDireQ/E4wxiyKmOJoqkccB6QR5IYQwUIod/O3SEMktcNuG5YOOY -xYlt/5BwWmGN5R8jNUdjeRr+feFiZLc5rWFw+RneWaWg0Zq0qaiduKG+5Rgbfjat -nXOFLFlvFHTqKNtC3LTl7hLzyRyZSQjggiLJ/VhB1H8QUoqAzl0S0lRLzn0rqDqB -0e4WMlFwhNPwM5BBOZ/4Vw1ZvlYRrm5lXP/wYsWUliiVrc3wtRc5NNf6KfXLDbl2 -VDhjB6o4lAmh5jrFo8FWXa+WhG4TC0qViI3vI8fsqDV7g+CuYaXmHFqiPJeb85Uc -34v9e7NjDnhWV15hTfz1txKxsX/eQBYPgrbBBjJ5n9nJW2Db7oIOTAZESqsj1ky+ -VKuW7Tf+WAsfO4/PbfcskHlzHOMfImUAmdUTRud/6dL3R7bmW1zmHh0riAGjMQ9d -7LICqcB/JUlSa4+m+WWniAka7R5iOqJYBQmqPH+4XQ82SCLlN9V130RGT9lo/PZr -Fx5WiiaRBul9CG0w3TI52EVaWfF2DzgqdW91Vtgec5RRAS93lcbJBhnhET1rcFoE -MTRuGriw5Aq5zIhEhPd4U9t7OOSESICruTVfpYqE5KxKDqgyF04= -=/9tV ------END PGP SIGNATURE----- diff --git a/bind.spec b/bind.spec index 9a653cbefd19b333ed55ee938e0e93f2e18abc90..3e3e4a7c6cabc608961e096d32d0ae2163ade119 100644 --- a/bind.spec +++ b/bind.spec @@ -28,10 +28,10 @@ Summary: The Berkeley Internet Name Domain (BIND) DNS (Domain Name System) server Name: bind License: MPLv2.0 -Version: 9.20.16 -Release: 1 +Version: 9.18.21 +Release: 6 Epoch: 32 -Url: https://www.isc.org/bind/ +Url: https://www.isc.org/downloads/bind/ # Source0: https://downloads.isc.org/isc/bind9/%{version}/bind-%{version}.tar.xz Source1: named.sysconfig @@ -59,6 +59,23 @@ Source44: named-chroot-setup.service Source46: named-setup-rndc.service Source48: setup-named-softhsm.sh Source49: named-chroot.files + +Patch6000:backport-CVE-2023-4408.patch +Patch6001:backport-CVE-2023-5517.patch +Patch6002:backport-CVE-2023-5679.patch +Patch6003:backport-CVE-2023-50387-CVE-2023-50868.patch +Patch6004:backport-CVE-2024-0760.patch +Patch6005:backport-optimize-the-slabheader-placement-for-certain-RRtypes.patch +Patch6006:backport-CVE-2024-1737.patch +Patch6007:backport-CVE-2024-1975.patch +Patch6008:backport-CVE-2024-4076.patch +Patch6009:backport-CVE-2024-11187.patch +Patch6010:backport-CVE-2024-12705.patch +Patch6011:backport-CVE-2025-8677.patch +Patch6012:backport-CVE-2025-40778-01.patch +Patch6013:backport-CVE-2025-40778-02.patch +Patch6014:backport-CVE-2025-40778-03.patch +Patch6015:backport-CVE-2025-40780.patch # Common patches %{?systemd_ordering} @@ -87,7 +104,6 @@ BuildRequires: systemd BuildRequires: libnsl2 BuildRequires: libnghttp2-devel BuildRequires: chrpath -BuildRequires: userspace-rcu-devel %if %{with DLZ} BuildRequires: openldap-devel, libpq-devel, sqlite-devel, mariadb-connector-c-devel @@ -727,7 +743,7 @@ fi; %{_mandir}/man8/rndc-confgen.8* %{_mandir}/man1/named-journalprint.1* %{_mandir}/man8/filter-*.8.gz -%doc README.md named.conf.default +%doc CHANGES README.md named.conf.default %doc sample/ # Hide configuration @@ -756,9 +772,11 @@ fi; %dir /run/named %files libs +%{_libdir}/libbind9-%{version}*.so %{_libdir}/libisccc-%{version}*.so %{_libdir}/libns-%{version}*.so %{_libdir}/libdns-%{version}*.so +%{_libdir}/libirs-%{version}*.so %{_libdir}/libisc-%{version}*.so %{_libdir}/libisccfg-%{version}*.so @@ -809,12 +827,15 @@ fi; %files devel +%{_libdir}/libbind9.so %{_libdir}/libisccc.so %{_libdir}/libns.so %{_libdir}/libdns.so +%{_libdir}/libirs.so %{_libdir}/libisc.so %{_libdir}/libisccfg.so %dir %{_includedir}/bind9 +%{_includedir}/bind9/bind9 %{_includedir}/bind9/isccc %{_includedir}/bind9/ns %{_includedir}/bind9/dns @@ -894,11 +915,11 @@ fi; %endif %changelog -* Wed Dec 10 2025 tangce - 32:9.20.16-1 +* Fri Dec 12 2025 tangce - 32:9.18.21-6 - Type:requirement - CVE:NA - SUG:NA -- DESC:update to 9.20.16 +- DESC:Revert Update due to bind-dyndb-ldap build failure * Mon Nov 03 2025 tangce - 32:9.18.21-5 - Type:CVE