From 351890d682241e4eb9b8a63cddfb22e90567b870 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 15 Mar 2024 17:49:23 -0700 Subject: [PATCH 1/7] rustdoc: clean up formatting --- src/librustdoc/html/static/js/search.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 875ebe2fc90d4..cbfa503f26060 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -426,7 +426,7 @@ function initSearch(rawSearchIndex) { return c === "," || c === "="; } -/** + /** * Returns `true` if the given `c` character is a path separator. For example * `:` in `a::b` or a whitespace in `a b`. * From 5b44bfda7fc62b2874400e613672aefe5b49aaaa Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 16 Mar 2024 17:50:44 -0700 Subject: [PATCH 2/7] rustdoc-search: shard the search result descriptions The descriptions are, on almost all crates[^1], the majority of the size of the search index, even though they aren't really used for searching. This makes it relatively easy to separate them into their own files. This commit also bumps us to ES8. Out of the browsers we support, all of them support async functions according to caniuse. https://caniuse.com/async-functions [^1]: , a crate with 44MiB of pure names and no descriptions for them, is an outlier and should not be counted. --- .../docker/host-x86_64/mingw-check/Dockerfile | 2 +- src/librustdoc/html/render/mod.rs | 33 +- src/librustdoc/html/render/search_index.rs | 101 ++++- src/librustdoc/html/render/write_shared.rs | 31 +- src/librustdoc/html/static/.eslintrc.js | 2 +- src/librustdoc/html/static/js/main.js | 28 +- src/librustdoc/html/static/js/search.js | 348 ++++++++++-------- src/librustdoc/html/static/js/storage.js | 4 +- src/tools/rustdoc-js/.eslintrc.js | 2 +- src/tools/rustdoc-js/tester.js | 104 ++++-- tests/rustdoc/search-index-summaries.rs | 2 +- 11 files changed, 428 insertions(+), 229 deletions(-) diff --git a/src/ci/docker/host-x86_64/mingw-check/Dockerfile b/src/ci/docker/host-x86_64/mingw-check/Dockerfile index 30d3a52d82b82..de8db8ee03496 100644 --- a/src/ci/docker/host-x86_64/mingw-check/Dockerfile +++ b/src/ci/docker/host-x86_64/mingw-check/Dockerfile @@ -56,7 +56,7 @@ ENV SCRIPT python3 ../x.py --stage 2 test src/tools/expand-yaml-anchors && \ /scripts/validate-error-codes.sh && \ reuse --include-submodules lint && \ # Runs checks to ensure that there are no ES5 issues in our JS code. - es-check es6 ../src/librustdoc/html/static/js/*.js && \ + es-check es8 ../src/librustdoc/html/static/js/*.js && \ eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \ eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \ eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 6c5040414bced..c1a7593c26f08 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -184,40 +184,15 @@ pub(crate) enum RenderTypeId { impl RenderTypeId { pub fn write_to_string(&self, string: &mut String) { - // (sign, value) - let (sign, id): (bool, u32) = match &self { + let id: i32 = match &self { // 0 is a sentinel, everything else is one-indexed // concrete type - RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()), + RenderTypeId::Index(idx) if *idx >= 0 => (idx + 1isize).try_into().unwrap(), // generic type parameter - RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()), + RenderTypeId::Index(idx) => (*idx).try_into().unwrap(), _ => panic!("must convert render types to indexes before serializing"), }; - // zig-zag encoding - let value: u32 = (id << 1) | (if sign { 1 } else { 0 }); - // Self-terminating hex use capital letters for everything but the - // least significant digit, which is lowercase. For example, decimal 17 - // would be `` Aa `` if zig-zag encoding weren't used. - // - // Zig-zag encoding, however, stores the sign bit as the last bit. - // This means, in the last hexit, 1 is actually `c`, -1 is `b` - // (`a` is the imaginary -0), and, because all the bits are shifted - // by one, `` A` `` is actually 8 and `` Aa `` is -8. - // - // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html - // describes the encoding in more detail. - let mut shift: u32 = 28; - let mut mask: u32 = 0xF0_00_00_00; - while shift < 32 { - let hexit = (value & mask) >> shift; - if hexit != 0 || shift == 0 { - let hex = - char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap(); - string.push(hex); - } - shift = shift.wrapping_sub(4); - mask = mask >> 4; - } + search_index::write_vlqhex_to_string(id, string); } } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index f153a90832910..34a4a89aa7bfa 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -17,12 +17,25 @@ use crate::html::format::join_with_double_colon; use crate::html::markdown::short_markdown_summary; use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, RenderTypeId}; +/// The serialized search description sharded version +/// +/// The `index` is a JSON-encoded list of names and other information. +/// +/// The desc has newlined descriptions, split up by size into 1MiB shards. +/// For example, `(4, "foo\nbar\nbaz\nquux")`. +pub(crate) struct SerializedSearchIndex { + pub(crate) index: String, + pub(crate) desc: Vec<(usize, String)>, +} + +const DESC_INDEX_SHARD_LEN: usize = 1024 * 1024; + /// Builds the search index from the collected metadata pub(crate) fn build_index<'tcx>( krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt<'tcx>, -) -> String { +) -> SerializedSearchIndex { let mut itemid_to_pathid = FxHashMap::default(); let mut primitives = FxHashMap::default(); let mut associated_types = FxHashMap::default(); @@ -318,7 +331,6 @@ pub(crate) fn build_index<'tcx>( .collect::>(); struct CrateData<'a> { - doc: String, items: Vec<&'a IndexItem>, paths: Vec<(ItemType, Vec)>, // The String is alias name and the vec is the list of the elements with this alias. @@ -327,6 +339,9 @@ pub(crate) fn build_index<'tcx>( aliases: &'a BTreeMap>, // Used when a type has more than one impl with an associated item with the same name. associated_item_disambiguators: &'a Vec<(usize, String)>, + // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string + // for information on the format. + descindex: String, } struct Paths { @@ -408,7 +423,6 @@ pub(crate) fn build_index<'tcx>( let mut names = Vec::with_capacity(self.items.len()); let mut types = String::with_capacity(self.items.len()); let mut full_paths = Vec::with_capacity(self.items.len()); - let mut descriptions = Vec::with_capacity(self.items.len()); let mut parents = Vec::with_capacity(self.items.len()); let mut functions = String::with_capacity(self.items.len()); let mut deprecated = Vec::with_capacity(self.items.len()); @@ -431,7 +445,6 @@ pub(crate) fn build_index<'tcx>( parents.push(item.parent_idx.map(|x| x + 1).unwrap_or(0)); names.push(item.name.as_str()); - descriptions.push(&item.desc); if !item.path.is_empty() { full_paths.push((index, &item.path)); @@ -454,14 +467,12 @@ pub(crate) fn build_index<'tcx>( let has_aliases = !self.aliases.is_empty(); let mut crate_data = serializer.serialize_struct("CrateData", if has_aliases { 9 } else { 8 })?; - crate_data.serialize_field("doc", &self.doc)?; crate_data.serialize_field("t", &types)?; crate_data.serialize_field("n", &names)?; - // Serialize as an array of item indices and full paths crate_data.serialize_field("q", &full_paths)?; - crate_data.serialize_field("d", &descriptions)?; crate_data.serialize_field("i", &parents)?; crate_data.serialize_field("f", &functions)?; + crate_data.serialize_field("D", &self.descindex)?; crate_data.serialize_field("c", &deprecated)?; crate_data.serialize_field("p", &paths)?; crate_data.serialize_field("b", &self.associated_item_disambiguators)?; @@ -472,16 +483,46 @@ pub(crate) fn build_index<'tcx>( } } - // Collect the index into a string - format!( + let desc = { + let mut result = Vec::new(); + let mut set = String::new(); + let mut len: usize = 0; + for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) { + if set.len() >= DESC_INDEX_SHARD_LEN { + result.push((len, std::mem::replace(&mut set, String::new()))); + len = 0; + } else if len != 0 { + set.push('\n'); + } + set.push_str(&desc); + len += 1; + } + result.push((len, std::mem::replace(&mut set, String::new()))); + result + }; + + let descindex = { + let mut descindex = String::with_capacity(desc.len() * 4); + for &(len, _) in desc.iter() { + write_vlqhex_to_string(len.try_into().unwrap(), &mut descindex); + } + descindex + }; + + assert_eq!(crate_items.len() + 1, desc.iter().map(|(len, _)| *len).sum::()); + + // The index, which is actually used to search, is JSON + // It uses `JSON.parse(..)` to actually load, since JSON + // parses faster than the full JavaScript syntax. + let index = format!( r#"["{}",{}]"#, krate.name(tcx), serde_json::to_string(&CrateData { - doc: crate_doc, items: crate_items, paths: crate_paths, aliases: &aliases, associated_item_disambiguators: &associated_item_disambiguators, + descindex, }) .expect("failed serde conversion") // All these `replace` calls are because we have to go through JS string for JSON content. @@ -489,7 +530,45 @@ pub(crate) fn build_index<'tcx>( .replace('\'', r"\'") // We need to escape double quotes for the JSON. .replace("\\\"", "\\\\\"") - ) + ); + SerializedSearchIndex { index, desc } +} + +pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) { + let (sign, magnitude): (bool, u32) = + if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) }; + // zig-zag encoding + let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 }); + // Self-terminating hex use capital letters for everything but the + // least significant digit, which is lowercase. For example, decimal 17 + // would be `` Aa `` if zig-zag encoding weren't used. + // + // Zig-zag encoding, however, stores the sign bit as the last bit. + // This means, in the last hexit, 1 is actually `c`, -1 is `b` + // (`a` is the imaginary -0), and, because all the bits are shifted + // by one, `` A` `` is actually 8 and `` Aa `` is -8. + // + // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html + // describes the encoding in more detail. + let mut shift: u32 = 28; + let mut mask: u32 = 0xF0_00_00_00; + // first skip leading zeroes + while shift < 32 { + let hexit = (value & mask) >> shift; + if hexit != 0 || shift == 0 { + break; + } + shift = shift.wrapping_sub(4); + mask = mask >> 4; + } + // now write the rest + while shift < 32 { + let hexit = (value & mask) >> shift; + let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap(); + string.push(hex); + shift = shift.wrapping_sub(4); + mask = mask >> 4; + } } pub(crate) fn get_function_type_for_search<'tcx>( diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index fbd45b2b48ef9..c806bf1cc66f3 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -24,6 +24,7 @@ use crate::formats::cache::Cache; use crate::formats::item_type::ItemType; use crate::formats::Impl; use crate::html::format::Buffer; +use crate::html::render::search_index::SerializedSearchIndex; use crate::html::render::{AssocItemLink, ImplRenderingParameters}; use crate::html::{layout, static_files}; use crate::visit::DocVisitor; @@ -46,7 +47,7 @@ use crate::{try_err, try_none}; pub(super) fn write_shared( cx: &mut Context<'_>, krate: &Crate, - search_index: String, + search_index: SerializedSearchIndex, options: &RenderOptions, ) -> Result<(), Error> { // Write out the shared files. Note that these are shared among all rustdoc @@ -312,7 +313,7 @@ pub(super) fn write_shared( let dst = cx.dst.join(&format!("search-index{}.js", cx.shared.resource_suffix)); let (mut all_indexes, mut krates) = try_err!(collect_json(&dst, krate.name(cx.tcx()).as_str()), &dst); - all_indexes.push(search_index); + all_indexes.push(search_index.index); krates.push(krate.name(cx.tcx()).to_string()); krates.sort(); @@ -335,6 +336,32 @@ else if (window.initSearch) window.initSearch(searchIndex); Ok(v.into_bytes()) })?; + let search_desc_dir = cx.dst.join(format!("search.desc/{krate}", krate = krate.name(cx.tcx()))); + if Path::new(&search_desc_dir).exists() { + try_err!(std::fs::remove_dir_all(&search_desc_dir), &search_desc_dir); + } + try_err!(std::fs::create_dir_all(&search_desc_dir), &search_desc_dir); + let kratename = krate.name(cx.tcx()).to_string(); + for (i, (_, data)) in search_index.desc.into_iter().enumerate() { + let output_filename = static_files::suffix_path( + &format!("{kratename}-desc-{i}-.js"), + &cx.shared.resource_suffix, + ); + let path = search_desc_dir.join(output_filename); + try_err!( + std::fs::write( + &path, + &format!( + r##"searchState.loadedDescShard({kratename}, {i}, {data})"##, + kratename = serde_json::to_string(&kratename).unwrap(), + data = serde_json::to_string(&data).unwrap(), + ) + .into_bytes() + ), + &path + ); + } + write_invocation_specific("crates.js", &|| { let krates = krates.iter().map(|k| format!("\"{k}\"")).join(","); Ok(format!("window.ALL_CRATES = [{krates}];").into_bytes()) diff --git a/src/librustdoc/html/static/.eslintrc.js b/src/librustdoc/html/static/.eslintrc.js index 1a34530c2d16e..a1e9cc6dfa142 100644 --- a/src/librustdoc/html/static/.eslintrc.js +++ b/src/librustdoc/html/static/.eslintrc.js @@ -5,7 +5,7 @@ module.exports = { }, "extends": "eslint:recommended", "parserOptions": { - "ecmaVersion": 2015, + "ecmaVersion": 8, "sourceType": "module" }, "rules": { diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index b9a769a7c6da4..2c1330614b8d8 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -329,6 +329,26 @@ function preLoadCss(cssUrl) { search.innerHTML = "

" + searchState.loadingText + "

"; searchState.showResults(search); }, + descShards: new Map(), + loadDesc: async function({descShard, descIndex}) { + if (descShard.promise === null) { + descShard.promise = new Promise((resolve, reject) => { + descShard.resolve = resolve; + const ds = descShard; + const fname = `${ds.crate}-desc-${ds.shard}-`; + const url = resourcePath( + `search.desc/${descShard.crate}/${fname}`, + ".js", + ); + loadScript(url, reject); + }); + } + const list = await descShard.promise; + return list[descIndex]; + }, + loadedDescShard: function (crate, shard, data) { + this.descShards.get(crate)[shard].resolve(data.split("\n")); + }, }; const toggleAllDocsId = "toggle-all-docs"; @@ -381,7 +401,7 @@ function preLoadCss(cssUrl) { window.location.replace("#" + item.id); }, 0); } - } + }, ); } } @@ -585,7 +605,7 @@ function preLoadCss(cssUrl) { const script = document .querySelector("script[data-ignore-extern-crates]"); const ignoreExternCrates = new Set( - (script ? script.getAttribute("data-ignore-extern-crates") : "").split(",") + (script ? script.getAttribute("data-ignore-extern-crates") : "").split(","), ); for (const lib of libs) { if (lib === window.currentCrate || ignoreExternCrates.has(lib)) { @@ -1098,7 +1118,7 @@ function preLoadCss(cssUrl) { } else { wrapper.style.setProperty( "--popover-arrow-offset", - (wrapperPos.right - pos.right + 4) + "px" + (wrapperPos.right - pos.right + 4) + "px", ); } wrapper.style.visibility = ""; @@ -1680,7 +1700,7 @@ href="https://doc.rust-lang.org/${channel}/rustdoc/read-documentation/search.htm pendingSidebarResizingFrame = false; document.documentElement.style.setProperty( "--resizing-sidebar-width", - desiredSidebarSize + "px" + desiredSidebarSize + "px", ); }, 100); } diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index cbfa503f26060..2732d6b15436d 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -206,14 +206,14 @@ const editDistanceState = { // insertion this.current[j - 1] + 1, // substitution - this.prev[j - 1] + substitutionCost + this.prev[j - 1] + substitutionCost, ); if ((i > 1) && (j > 1) && (a[aIdx] === b[bIdx - 1]) && (a[aIdx - 1] === b[bIdx])) { // transposition this.current[j] = Math.min( this.current[j], - this.prevPrev[j - 2] + 1 + this.prevPrev[j - 2] + 1, ); } } @@ -856,8 +856,8 @@ function initSearch(rawSearchIndex) { parserState, parserState.userQuery.slice(start, end), generics, - isInGenerics - ) + isInGenerics, + ), ); } } @@ -1295,7 +1295,7 @@ function initSearch(rawSearchIndex) { * * @return {ResultsTable} */ - function execQuery(parsedQuery, filterCrates, currentCrate) { + async function execQuery(parsedQuery, filterCrates, currentCrate) { const results_others = new Map(), results_in_args = new Map(), results_returned = new Map(); @@ -1326,6 +1326,7 @@ function initSearch(rawSearchIndex) { duplicates.add(obj.fullPath); obj.href = res[1]; + obj.desc = result.desc; out.push(obj); if (out.length >= MAX_RESULTS) { break; @@ -1342,9 +1343,9 @@ function initSearch(rawSearchIndex) { * @param {Results} results * @param {boolean} isType * @param {string} preferredCrate - * @returns {[ResultObject]} + * @returns {Promise<[ResultObject]>} */ - function sortResults(results, isType, preferredCrate) { + async function sortResults(results, isType, preferredCrate) { const userQuery = parsedQuery.userQuery; const result_list = []; for (const result of results.values()) { @@ -1352,6 +1353,12 @@ function initSearch(rawSearchIndex) { result.word = searchIndex[result.id].word; result_list.push(result); } + for (const result of result_list) { + result.desc = searchState.loadDesc(result.item); + } + for (const result of result_list) { + result.desc = await result.desc; + } result_list.sort((aaa, bbb) => { let a, b; @@ -1422,8 +1429,8 @@ function initSearch(rawSearchIndex) { } // sort by description (no description goes later) - a = (aaa.item.desc === ""); - b = (bbb.item.desc === ""); + a = (aaa.desc === ""); + b = (bbb.desc === ""); if (a !== b) { return a - b; } @@ -1477,7 +1484,7 @@ function initSearch(rawSearchIndex) { whereClause, mgensIn, solutionCb, - unboxingDepth + unboxingDepth, ) { if (unboxingDepth >= UNBOXING_LIMIT) { return false; @@ -1524,7 +1531,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgens, - unboxingDepth + 1 + unboxingDepth + 1, )) { continue; } @@ -1541,7 +1548,7 @@ function initSearch(rawSearchIndex) { whereClause, mgensScratch, solutionCb, - unboxingDepth + 1 + unboxingDepth + 1, )) { return true; } @@ -1551,7 +1558,7 @@ function initSearch(rawSearchIndex) { whereClause, mgens ? new Map(mgens) : null, solutionCb, - unboxingDepth + 1 + unboxingDepth + 1, )) { return true; } @@ -1625,7 +1632,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgensScratch, - unboxingDepth + unboxingDepth, ); if (!solution) { return false; @@ -1638,7 +1645,7 @@ function initSearch(rawSearchIndex) { whereClause, simplifiedMgens, solutionCb, - unboxingDepth + unboxingDepth, ); if (passesUnification) { return true; @@ -1646,7 +1653,7 @@ function initSearch(rawSearchIndex) { } return false; }, - unboxingDepth + unboxingDepth, ); if (passesUnification) { return true; @@ -1663,7 +1670,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgens, - unboxingDepth + 1 + unboxingDepth + 1, )) { continue; } @@ -1689,7 +1696,7 @@ function initSearch(rawSearchIndex) { whereClause, mgensScratch, solutionCb, - unboxingDepth + 1 + unboxingDepth + 1, ); if (passesUnification) { return true; @@ -1820,7 +1827,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgensIn, - unboxingDepth + unboxingDepth, ) { if (fnType.bindings.size < queryElem.bindings.size) { return false; @@ -1849,7 +1856,7 @@ function initSearch(rawSearchIndex) { // possible solutions return false; }, - unboxingDepth + unboxingDepth, ); return newSolutions; }); @@ -1887,7 +1894,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgens, - unboxingDepth + unboxingDepth, ) { if (unboxingDepth >= UNBOXING_LIMIT) { return false; @@ -1914,7 +1921,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgensTmp, - unboxingDepth + unboxingDepth, ); } else if (fnType.generics.length > 0 || fnType.bindings.size > 0) { const simplifiedGenerics = [ @@ -1926,7 +1933,7 @@ function initSearch(rawSearchIndex) { queryElem, whereClause, mgens, - unboxingDepth + unboxingDepth, ); } return false; @@ -1975,7 +1982,7 @@ function initSearch(rawSearchIndex) { elem, whereClause, mgens, - unboxingDepth + 1 + unboxingDepth + 1, ); } if (row.id > 0 && elem.id > 0 && elem.pathWithoutLast.length === 0 && @@ -1989,7 +1996,7 @@ function initSearch(rawSearchIndex) { elem, whereClause, mgens, - unboxingDepth + unboxingDepth, ); } } @@ -2007,7 +2014,7 @@ function initSearch(rawSearchIndex) { return 0; } const maxPathEditDistance = Math.floor( - contains.reduce((acc, next) => acc + next.length, 0) / 3 + contains.reduce((acc, next) => acc + next.length, 0) / 3, ); let ret_dist = maxPathEditDistance + 1; const path = ty.path.split("::"); @@ -2066,7 +2073,8 @@ function initSearch(rawSearchIndex) { crate: item.crate, name: item.name, path: item.path, - desc: item.desc, + descShard: item.descShard, + descIndex: item.descIndex, ty: item.ty, parent: item.parent, type: item.type, @@ -2192,7 +2200,7 @@ function initSearch(rawSearchIndex) { results_others, results_in_args, results_returned, - maxEditDistance + maxEditDistance, ) { if (!row || (filterCrates !== null && row.crate !== filterCrates)) { return; @@ -2204,7 +2212,7 @@ function initSearch(rawSearchIndex) { // atoms in the function not present in the query const tfpDist = compareTypeFingerprints( fullId, - parsedQuery.typeFingerprint + parsedQuery.typeFingerprint, ); if (tfpDist !== null) { const in_args = row.type && row.type.inputs @@ -2276,7 +2284,7 @@ function initSearch(rawSearchIndex) { const tfpDist = compareTypeFingerprints( row.id, - parsedQuery.typeFingerprint + parsedQuery.typeFingerprint, ); if (tfpDist === null) { return; @@ -2298,10 +2306,10 @@ function initSearch(rawSearchIndex) { row.type.where_clause, mgens, null, - 0 // unboxing depth + 0, // unboxing depth ); }, - 0 // unboxing depth + 0, // unboxing depth )) { return; } @@ -2419,7 +2427,7 @@ function initSearch(rawSearchIndex) { } return [typeNameIdMap.get(name).id, constraints]; - }) + }), ); } @@ -2446,7 +2454,7 @@ function initSearch(rawSearchIndex) { results_others, results_in_args, results_returned, - maxEditDistance + maxEditDistance, ); } } @@ -2478,9 +2486,9 @@ function initSearch(rawSearchIndex) { } const ret = createQueryResults( - sortResults(results_in_args, true, currentCrate), - sortResults(results_returned, true, currentCrate), - sortResults(results_others, false, currentCrate), + await sortResults(results_in_args, true, currentCrate), + await sortResults(results_returned, true, currentCrate), + await sortResults(results_others, false, currentCrate), parsedQuery); handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates, currentCrate); if (parsedQuery.error !== null && ret.others.length !== 0) { @@ -2581,14 +2589,14 @@ function initSearch(rawSearchIndex) { * @param {ParsedQuery} query * @param {boolean} display - True if this is the active tab */ - function addTab(array, query, display) { + async function addTab(array, query, display) { const extraClass = display ? " active" : ""; const output = document.createElement("div"); if (array.length > 0) { output.className = "search-results " + extraClass; - array.forEach(item => { + for (const item of array) { const name = item.name; const type = itemTypes[item.ty]; const longType = longItemTypes[item.ty]; @@ -2624,7 +2632,7 @@ ${item.displayPath}${name}\ link.appendChild(description); output.appendChild(link); - }); + } } else if (query.error === null) { output.className = "search-failed" + extraClass; output.innerHTML = "No results :(
" + @@ -2666,7 +2674,7 @@ ${item.displayPath}${name}\ * @param {boolean} go_to_first * @param {string} filterCrates */ - function showResults(results, go_to_first, filterCrates) { + async function showResults(results, go_to_first, filterCrates) { const search = searchState.outputElement(); if (go_to_first || (results.others.length === 1 && getSettingValue("go-to-only-result") === "true") @@ -2699,9 +2707,9 @@ ${item.displayPath}${name}\ currentResults = results.query.userQuery; - const ret_others = addTab(results.others, results.query, true); - const ret_in_args = addTab(results.in_args, results.query, false); - const ret_returned = addTab(results.returned, results.query, false); + const ret_others = await addTab(results.others, results.query, true); + const ret_in_args = await addTab(results.in_args, results.query, false); + const ret_returned = await addTab(results.returned, results.query, false); // Navigate to the relevant tab if the current tab is empty, like in case users search // for "-> String". If they had selected another tab previously, they have to click on @@ -2822,7 +2830,7 @@ ${item.displayPath}${name}\ * and display the results. * @param {boolean} [forced] */ - function search(forced) { + async function search(forced) { const query = parseQuery(searchState.input.value.trim()); let filterCrates = getFilterCrates(); @@ -2850,8 +2858,8 @@ ${item.displayPath}${name}\ // recent search query is added to the browser history. updateSearchHistory(buildUrl(query.original, filterCrates)); - showResults( - execQuery(query, filterCrates, window.currentCrate), + await showResults( + await execQuery(query, filterCrates, window.currentCrate), params.go_to_first, filterCrates); } @@ -2920,7 +2928,7 @@ ${item.displayPath}${name}\ pathIndex = type[PATH_INDEX_DATA]; generics = buildItemSearchTypeAll( type[GENERICS_DATA], - lowercasePaths + lowercasePaths, ); if (type.length > BINDINGS_DATA && type[BINDINGS_DATA].length > 0) { bindings = new Map(type[BINDINGS_DATA].map(binding => { @@ -3030,101 +3038,49 @@ ${item.displayPath}${name}\ * The raw function search type format is generated using serde in * librustdoc/html/render/mod.rs: IndexItemFunctionType::write_to_string * - * @param {{ - * string: string, - * offset: number, - * backrefQueue: FunctionSearchType[] - * }} itemFunctionDecoder * @param {Array<{name: string, ty: number}>} lowercasePaths - * @param {Map} * * @return {null|FunctionSearchType} */ - function buildFunctionSearchType(itemFunctionDecoder, lowercasePaths) { - const c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); - itemFunctionDecoder.offset += 1; - const [zero, ua, la, ob, cb] = ["0", "@", "`", "{", "}"].map(c => c.charCodeAt(0)); - // `` ` `` is used as a sentinel because it's fewer bytes than `null`, and decodes to zero - // `0` is a backref - if (c === la) { - return null; - } - // sixteen characters after "0" are backref - if (c >= zero && c < ua) { - return itemFunctionDecoder.backrefQueue[c - zero]; - } - if (c !== ob) { - throw ["Unexpected ", c, " in function: expected ", "{", "; this is a bug"]; - } - // call after consuming `{` - function decodeList() { - let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); - const ret = []; - while (c !== cb) { - ret.push(decode()); - c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); - } - itemFunctionDecoder.offset += 1; // eat cb - return ret; - } - // consumes and returns a list or integer - function decode() { - let n = 0; - let c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); - if (c === ob) { - itemFunctionDecoder.offset += 1; - return decodeList(); - } - while (c < la) { - n = (n << 4) | (c & 0xF); - itemFunctionDecoder.offset += 1; - c = itemFunctionDecoder.string.charCodeAt(itemFunctionDecoder.offset); - } - // last character >= la - n = (n << 4) | (c & 0xF); - const [sign, value] = [n & 1, n >> 1]; - itemFunctionDecoder.offset += 1; - return sign ? -value : value; - } - const functionSearchType = decodeList(); - const INPUTS_DATA = 0; - const OUTPUT_DATA = 1; - let inputs, output; - if (typeof functionSearchType[INPUTS_DATA] === "number") { - inputs = [buildItemSearchType(functionSearchType[INPUTS_DATA], lowercasePaths)]; - } else { - inputs = buildItemSearchTypeAll( - functionSearchType[INPUTS_DATA], - lowercasePaths - ); - } - if (functionSearchType.length > 1) { - if (typeof functionSearchType[OUTPUT_DATA] === "number") { - output = [buildItemSearchType(functionSearchType[OUTPUT_DATA], lowercasePaths)]; + function buildFunctionSearchTypeCallback(lowercasePaths) { + return functionSearchType => { + if (functionSearchType === 0) { + return null; + } + const INPUTS_DATA = 0; + const OUTPUT_DATA = 1; + let inputs, output; + if (typeof functionSearchType[INPUTS_DATA] === "number") { + inputs = [buildItemSearchType(functionSearchType[INPUTS_DATA], lowercasePaths)]; } else { - output = buildItemSearchTypeAll( - functionSearchType[OUTPUT_DATA], - lowercasePaths + inputs = buildItemSearchTypeAll( + functionSearchType[INPUTS_DATA], + lowercasePaths, ); } - } else { - output = []; - } - const where_clause = []; - const l = functionSearchType.length; - for (let i = 2; i < l; ++i) { - where_clause.push(typeof functionSearchType[i] === "number" - ? [buildItemSearchType(functionSearchType[i], lowercasePaths)] - : buildItemSearchTypeAll(functionSearchType[i], lowercasePaths)); - } - const ret = { - inputs, output, where_clause, + if (functionSearchType.length > 1) { + if (typeof functionSearchType[OUTPUT_DATA] === "number") { + output = [buildItemSearchType(functionSearchType[OUTPUT_DATA], lowercasePaths)]; + } else { + output = buildItemSearchTypeAll( + functionSearchType[OUTPUT_DATA], + lowercasePaths, + ); + } + } else { + output = []; + } + const where_clause = []; + const l = functionSearchType.length; + for (let i = 2; i < l; ++i) { + where_clause.push(typeof functionSearchType[i] === "number" + ? [buildItemSearchType(functionSearchType[i], lowercasePaths)] + : buildItemSearchTypeAll(functionSearchType[i], lowercasePaths)); + } + return { + inputs, output, where_clause, + }; }; - itemFunctionDecoder.backrefQueue.unshift(ret); - if (itemFunctionDecoder.backrefQueue.length > 16) { - itemFunctionDecoder.backrefQueue.pop(); - } - return ret; } /** @@ -3245,6 +3201,68 @@ ${item.displayPath}${name}\ return functionTypeFingerprint[(fullId * 4) + 3]; } + class VlqHexDecoder { + constructor(string, cons) { + this.string = string; + this.cons = cons; + this.offset = 0; + this.backrefQueue = []; + } + // call after consuming `{` + decodeList() { + const cb = "}".charCodeAt(0); + let c = this.string.charCodeAt(this.offset); + const ret = []; + while (c !== cb) { + ret.push(this.decode()); + c = this.string.charCodeAt(this.offset); + } + this.offset += 1; // eat cb + return ret; + } + // consumes and returns a list or integer + decode() { + const [ob, la] = ["{", "`"].map(c => c.charCodeAt(0)); + let n = 0; + let c = this.string.charCodeAt(this.offset); + if (c === ob) { + this.offset += 1; + return this.decodeList(); + } + while (c < la) { + n = (n << 4) | (c & 0xF); + this.offset += 1; + c = this.string.charCodeAt(this.offset); + } + // last character >= la + n = (n << 4) | (c & 0xF); + const [sign, value] = [n & 1, n >> 1]; + this.offset += 1; + return sign ? -value : value; + } + next() { + const c = this.string.charCodeAt(this.offset); + const [zero, ua, la] = ["0", "@", "`"].map(c => c.charCodeAt(0)); + // sixteen characters after "0" are backref + if (c >= zero && c < ua) { + this.offset += 1; + return this.backrefQueue[c - zero]; + } + // special exception: 0 doesn't use backref encoding + // it's already one character, and it's always nullish + if (c === la) { + this.offset += 1; + return this.cons(0); + } + const result = this.cons(this.decode()); + this.backrefQueue.unshift(result); + if (this.backrefQueue.length > 16) { + this.backrefQueue.pop(); + } + return result; + } + } + /** * Convert raw search index into in-memory search index. * @@ -3271,18 +3289,32 @@ ${item.displayPath}${name}\ id = 0; for (const [crate, crateCorpus] of rawSearchIndex) { + // a string representing the lengths of each description shard + // a string representing the list of function types + const itemDescShardDecoder = new VlqHexDecoder(crateCorpus.D, noop => noop); + let descShard = { + crate, + shard: 0, + start: 0, + len: itemDescShardDecoder.next(), + promise: null, + resolve: null, + }; + const descShardList = [ descShard ]; + // This object should have exactly the same set of fields as the "row" // object defined below. Your JavaScript runtime will thank you. // https://mathiasbynens.be/notes/shapes-ics const crateRow = { - crate: crate, + crate, ty: 3, // == ExternCrate name: crate, path: "", - desc: crateCorpus.doc, + descShard, + descIndex: 0, parent: undefined, type: null, - id: id, + id, word: crate, normalizedName: crate.indexOf("_") === -1 ? crate : crate.replace(/_/g, ""), deprecated: null, @@ -3302,16 +3334,8 @@ ${item.displayPath}${name}\ // i.e. if indices 4 and 11 are present, but 5-10 and 12-13 are not present, // 5-10 will fall back to the path for 4 and 12-13 will fall back to the path for 11 const itemPaths = new Map(crateCorpus.q); - // an array of (String) descriptions - const itemDescs = crateCorpus.d; // an array of (Number) the parent path index + 1 to `paths`, or 0 if none const itemParentIdxs = crateCorpus.i; - // a string representing the list of function types - const itemFunctionDecoder = { - string: crateCorpus.f, - offset: 0, - backrefQueue: [], - }; // an array of (Number) indices for the deprecated items const deprecatedItems = new Set(crateCorpus.c); // an array of (Number) indices for the deprecated items @@ -3326,6 +3350,12 @@ ${item.displayPath}${name}\ // an array of [{name: String, ty: Number}] const lowercasePaths = []; + // a string representing the list of function types + const itemFunctionDecoder = new VlqHexDecoder( + crateCorpus.f, + buildFunctionSearchTypeCallback(lowercasePaths), + ); + // convert `rawPaths` entries into object form // generate normalizedPaths for function search mode let len = paths.length; @@ -3353,13 +3383,26 @@ ${item.displayPath}${name}\ // faster analysis operations lastPath = ""; len = itemTypes.length; + let descIndex = 1; for (let i = 0; i < len; ++i) { + if (descIndex >= descShard.len) { + descShard = { + crate, + shard: descShard.shard + 1, + start: descShard.start + descShard.len, + len: itemDescShardDecoder.next(), + promise: null, + resolve: null, + }; + descIndex = 0; + descShardList.push(descShard); + } let word = ""; if (typeof itemNames[i] === "string") { word = itemNames[i].toLowerCase(); } const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath; - const type = buildFunctionSearchType(itemFunctionDecoder, lowercasePaths); + const type = itemFunctionDecoder.next(); if (type !== null) { if (type) { const fp = functionTypeFingerprint.subarray(id * 4, (id + 1) * 4); @@ -3380,14 +3423,15 @@ ${item.displayPath}${name}\ // This object should have exactly the same set of fields as the "crateRow" // object defined above. const row = { - crate: crate, + crate, ty: itemTypes.charCodeAt(i) - charA, name: itemNames[i], - path: path, - desc: itemDescs[i], + path, + descShard, + descIndex, parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined, type, - id: id, + id, word, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), deprecated: deprecatedItems.has(i), @@ -3396,6 +3440,7 @@ ${item.displayPath}${name}\ id += 1; searchIndex.push(row); lastPath = row.path; + descIndex += 1; } if (aliases) { @@ -3419,6 +3464,7 @@ ${item.displayPath}${name}\ } } currentIndex += itemTypes.length; + searchState.descShards.set(crate, descShardList); } // Drop the (rather large) hash table used for reusing function items TYPES_POOL = new Map(); diff --git a/src/librustdoc/html/static/js/storage.js b/src/librustdoc/html/static/js/storage.js index bda7b3c647e7e..73c543567c078 100644 --- a/src/librustdoc/html/static/js/storage.js +++ b/src/librustdoc/html/static/js/storage.js @@ -211,14 +211,14 @@ function updateSidebarWidth() { if (desktopSidebarWidth && desktopSidebarWidth !== "null") { document.documentElement.style.setProperty( "--desktop-sidebar-width", - desktopSidebarWidth + "px" + desktopSidebarWidth + "px", ); } const srcSidebarWidth = getSettingValue("src-sidebar-width"); if (srcSidebarWidth && srcSidebarWidth !== "null") { document.documentElement.style.setProperty( "--src-sidebar-width", - srcSidebarWidth + "px" + srcSidebarWidth + "px", ); } } diff --git a/src/tools/rustdoc-js/.eslintrc.js b/src/tools/rustdoc-js/.eslintrc.js index 4ab3a31573309..b9d0e251c242b 100644 --- a/src/tools/rustdoc-js/.eslintrc.js +++ b/src/tools/rustdoc-js/.eslintrc.js @@ -6,7 +6,7 @@ module.exports = { }, "extends": "eslint:recommended", "parserOptions": { - "ecmaVersion": 2015, + "ecmaVersion": 8, "sourceType": "module" }, "rules": { diff --git a/src/tools/rustdoc-js/tester.js b/src/tools/rustdoc-js/tester.js index 86881ef362e9f..ae4121cc03f51 100644 --- a/src/tools/rustdoc-js/tester.js +++ b/src/tools/rustdoc-js/tester.js @@ -1,3 +1,4 @@ +/* global globalThis */ const fs = require("fs"); const path = require("path"); @@ -133,7 +134,7 @@ function valueCheck(fullPath, expected, result, error_text, queryName) { expected_value, result.get(key), error_text, - queryName + queryName, ); } else { error_text.push(`${queryName}==> EXPECTED has extra key in map from field ` + @@ -212,11 +213,11 @@ function runParser(query, expected, parseQuery, queryName) { return error_text; } -function runSearch(query, expected, doSearch, loadedFile, queryName) { +async function runSearch(query, expected, doSearch, loadedFile, queryName) { const ignore_order = loadedFile.ignore_order; const exact_check = loadedFile.exact_check; - const results = doSearch(query, loadedFile.FILTER_CRATE); + const results = await doSearch(query, loadedFile.FILTER_CRATE); const error_text = []; for (const key in expected) { @@ -238,7 +239,7 @@ function runSearch(query, expected, doSearch, loadedFile, queryName) { } let prev_pos = -1; - entry.forEach((elem, index) => { + for (const [index, elem] of entry.entries()) { const entry_pos = lookForEntry(elem, results[key]); if (entry_pos === -1) { error_text.push(queryName + "==> Result not found in '" + key + "': '" + @@ -260,13 +261,13 @@ function runSearch(query, expected, doSearch, loadedFile, queryName) { } else { prev_pos = entry_pos; } - }); + } } return error_text; } -function runCorrections(query, corrections, getCorrections, loadedFile) { - const qc = getCorrections(query, loadedFile.FILTER_CRATE); +async function runCorrections(query, corrections, getCorrections, loadedFile) { + const qc = await getCorrections(query, loadedFile.FILTER_CRATE); const error_text = []; if (corrections === null) { @@ -299,18 +300,27 @@ function checkResult(error_text, loadedFile, displaySuccess) { return 1; } -function runCheckInner(callback, loadedFile, entry, getCorrections, extra) { +async function runCheckInner(callback, loadedFile, entry, getCorrections, extra) { if (typeof entry.query !== "string") { console.log("FAILED"); console.log("==> Missing `query` field"); return false; } - let error_text = callback(entry.query, entry, extra ? "[ query `" + entry.query + "`]" : ""); + let error_text = await callback( + entry.query, + entry, + extra ? "[ query `" + entry.query + "`]" : "", + ); if (checkResult(error_text, loadedFile, false) !== 0) { return false; } if (entry.correction !== undefined) { - error_text = runCorrections(entry.query, entry.correction, getCorrections, loadedFile); + error_text = await runCorrections( + entry.query, + entry.correction, + getCorrections, + loadedFile, + ); if (checkResult(error_text, loadedFile, false) !== 0) { return false; } @@ -318,16 +328,16 @@ function runCheckInner(callback, loadedFile, entry, getCorrections, extra) { return true; } -function runCheck(loadedFile, key, getCorrections, callback) { +async function runCheck(loadedFile, key, getCorrections, callback) { const expected = loadedFile[key]; if (Array.isArray(expected)) { for (const entry of expected) { - if (!runCheckInner(callback, loadedFile, entry, getCorrections, true)) { + if (!await runCheckInner(callback, loadedFile, entry, getCorrections, true)) { return 1; } } - } else if (!runCheckInner(callback, loadedFile, expected, getCorrections, false)) { + } else if (!await runCheckInner(callback, loadedFile, expected, getCorrections, false)) { return 1; } console.log("OK"); @@ -338,7 +348,7 @@ function hasCheck(content, checkName) { return content.startsWith(`const ${checkName}`) || content.includes(`\nconst ${checkName}`); } -function runChecks(testFile, doSearch, parseQuery, getCorrections) { +async function runChecks(testFile, doSearch, parseQuery, getCorrections) { let checkExpected = false; let checkParsed = false; let testFileContent = readFile(testFile); @@ -367,12 +377,12 @@ function runChecks(testFile, doSearch, parseQuery, getCorrections) { let res = 0; if (checkExpected) { - res += runCheck(loadedFile, "EXPECTED", getCorrections, (query, expected, text) => { + res += await runCheck(loadedFile, "EXPECTED", getCorrections, (query, expected, text) => { return runSearch(query, expected, doSearch, loadedFile, text); }); } if (checkParsed) { - res += runCheck(loadedFile, "PARSED", getCorrections, (query, expected, text) => { + res += await runCheck(loadedFile, "PARSED", getCorrections, (query, expected, text) => { return runParser(query, expected, parseQuery, text); }); } @@ -393,6 +403,35 @@ function loadSearchJS(doc_folder, resource_suffix) { const searchIndexJs = path.join(doc_folder, "search-index" + resource_suffix + ".js"); const searchIndex = require(searchIndexJs); + globalThis.searchState = { + descShards: new Map(), + loadDesc: async function({descShard, descIndex}) { + if (descShard.promise === null) { + descShard.promise = new Promise((resolve, reject) => { + descShard.resolve = resolve; + const ds = descShard; + const fname = `${ds.crate}-desc-${ds.shard}-${resource_suffix}.js`; + fs.readFile( + `${doc_folder}/search.desc/${descShard.crate}/${fname}`, + (err, data) => { + if (err) { + reject(err); + } else { + eval(data.toString("utf8")); + } + }, + ); + }); + } + const list = await descShard.promise; + return list[descIndex]; + }, + loadedDescShard: function (crate, shard, data) { + //console.log(this.descShards); + this.descShards.get(crate)[shard].resolve(data.split("\n")); + }, + }; + const staticFiles = path.join(doc_folder, "static.files"); const searchJs = fs.readdirSync(staticFiles).find(f => f.match(/search.*\.js$/)); const searchModule = require(path.join(staticFiles, searchJs)); @@ -474,7 +513,7 @@ function parseOptions(args) { return null; } -function main(argv) { +async function main(argv) { const opts = parseOptions(argv.slice(2)); if (opts === null) { return 1; @@ -482,7 +521,7 @@ function main(argv) { const parseAndSearch = loadSearchJS( opts["doc_folder"], - opts["resource_suffix"] + opts["resource_suffix"], ); let errors = 0; @@ -494,21 +533,34 @@ function main(argv) { }; if (opts["test_file"].length !== 0) { - opts["test_file"].forEach(file => { + for (const file of opts["test_file"]) { process.stdout.write(`Testing ${file} ... `); - errors += runChecks(file, doSearch, parseAndSearch.parseQuery, getCorrections); - }); + errors += await runChecks(file, doSearch, parseAndSearch.parseQuery, getCorrections); + } } else if (opts["test_folder"].length !== 0) { - fs.readdirSync(opts["test_folder"]).forEach(file => { + for (const file of fs.readdirSync(opts["test_folder"])) { if (!file.endsWith(".js")) { - return; + continue; } process.stdout.write(`Testing ${file} ... `); - errors += runChecks(path.join(opts["test_folder"], file), doSearch, + errors += await runChecks(path.join(opts["test_folder"], file), doSearch, parseAndSearch.parseQuery, getCorrections); - }); + } } return errors > 0 ? 1 : 0; } -process.exit(main(process.argv)); +main(process.argv).catch(e => { + console.log(e); + process.exit(1); +}).then(x => process.exit(x)); + +process.on("beforeExit", () => { + console.log("process did not complete"); + process.exit(1); +}); + +/*process.on("uncaughtException", (err) => { + console.log(`Uncaught Exception: ${err.message}`); + process.exit(1); +});*/ diff --git a/tests/rustdoc/search-index-summaries.rs b/tests/rustdoc/search-index-summaries.rs index efd366405bfeb..529b42d0ca903 100644 --- a/tests/rustdoc/search-index-summaries.rs +++ b/tests/rustdoc/search-index-summaries.rs @@ -1,6 +1,6 @@ #![crate_name = "foo"] -// @hasraw 'search-index.js' 'Foo short link.' +// @hasraw 'search.desc/foo/foo-desc-0-.js' 'Foo short link.' // @!hasraw - 'www.example.com' // @!hasraw - 'More Foo.' From 2e368bfe9ec1ace30b43673d8b30a7f388b86e28 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 16 Mar 2024 23:01:47 -0700 Subject: [PATCH 3/7] Fix style errors --- src/librustdoc/html/static/js/main.js | 2 +- src/tools/rustdoc-js/tester.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index 2c1330614b8d8..eff3dc27911bf 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -346,7 +346,7 @@ function preLoadCss(cssUrl) { const list = await descShard.promise; return list[descIndex]; }, - loadedDescShard: function (crate, shard, data) { + loadedDescShard: function(crate, shard, data) { this.descShards.get(crate)[shard].resolve(data.split("\n")); }, }; diff --git a/src/tools/rustdoc-js/tester.js b/src/tools/rustdoc-js/tester.js index ae4121cc03f51..1af2f44c230d6 100644 --- a/src/tools/rustdoc-js/tester.js +++ b/src/tools/rustdoc-js/tester.js @@ -426,7 +426,7 @@ function loadSearchJS(doc_folder, resource_suffix) { const list = await descShard.promise; return list[descIndex]; }, - loadedDescShard: function (crate, shard, data) { + loadedDescShard: function(crate, shard, data) { //console.log(this.descShards); this.descShards.get(crate)[shard].resolve(data.split("\n")); }, From e860b9cd24ba7555c0aba8e850985c30a837335e Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 18 Mar 2024 13:39:35 -0700 Subject: [PATCH 4/7] Use promise.all to load sorted results in parallel --- src/librustdoc/html/static/js/search.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 2732d6b15436d..15da5bf96b2ca 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -2485,10 +2485,15 @@ function initSearch(rawSearchIndex) { innerRunQuery(); } + const [sorted_in_args, sorted_returned, sorted_others] = await Promise.all([ + sortResults(results_in_args, true, currentCrate), + sortResults(results_returned, true, currentCrate), + sortResults(results_others, false, currentCrate), + ]); const ret = createQueryResults( - await sortResults(results_in_args, true, currentCrate), - await sortResults(results_returned, true, currentCrate), - await sortResults(results_others, false, currentCrate), + sorted_in_args, + sorted_returned, + sorted_others, parsedQuery); handleAliases(ret, parsedQuery.original.replace(/"/g, ""), filterCrates, currentCrate); if (parsedQuery.error !== null && ret.others.length !== 0) { From 28db4ccda76ffd2ce4c36912a194979a7ce2ef8d Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 21 Mar 2024 17:19:39 -0700 Subject: [PATCH 5/7] rustdoc-search: compressed bitmap to sort, then load desc This adds a bit more data than "pure sharding" by including information about which items have no description at all. This way, it can sort the results, then truncate, then finally download the description. With the "e" bitmap: 2380KiB Without the "e" bitmap: 2364KiB --- Cargo.lock | 2 + src/librustdoc/Cargo.toml | 2 + src/librustdoc/html/render/search_index.rs | 233 ++++++++++++++++++++- src/librustdoc/html/static/js/search.js | 190 ++++++++++++++--- src/tools/rustdoc-js/tester.js | 5 - 5 files changed, 395 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16aed3dc49ca0..1e7ef0cca9bc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4741,6 +4741,8 @@ version = "0.0.0" dependencies = [ "arrayvec", "askama", + "base64", + "byteorder", "expect-test", "indexmap", "itertools 0.12.1", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index bd0fbef998b2b..44ba2a8c0153e 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -9,6 +9,8 @@ path = "lib.rs" [dependencies] arrayvec = { version = "0.7", default-features = false } askama = { version = "0.12", default-features = false, features = ["config"] } +base64 = "0.21.7" +byteorder = "1.5" itertools = "0.12" indexmap = "2" minifier = "0.3.0" diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 34a4a89aa7bfa..2ec22df0b434f 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -1,6 +1,7 @@ use std::collections::hash_map::Entry; use std::collections::{BTreeMap, VecDeque}; +use base64::prelude::*; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; @@ -21,14 +22,14 @@ use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, Re /// /// The `index` is a JSON-encoded list of names and other information. /// -/// The desc has newlined descriptions, split up by size into 1MiB shards. +/// The desc has newlined descriptions, split up by size into 128KiB shards. /// For example, `(4, "foo\nbar\nbaz\nquux")`. pub(crate) struct SerializedSearchIndex { pub(crate) index: String, pub(crate) desc: Vec<(usize, String)>, } -const DESC_INDEX_SHARD_LEN: usize = 1024 * 1024; +const DESC_INDEX_SHARD_LEN: usize = 128 * 1024; /// Builds the search index from the collected metadata pub(crate) fn build_index<'tcx>( @@ -342,6 +343,8 @@ pub(crate) fn build_index<'tcx>( // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string // for information on the format. descindex: String, + // A list of items with no description. This is eventually turned into a bitmap. + emptydesc: Vec, } struct Paths { @@ -456,7 +459,8 @@ pub(crate) fn build_index<'tcx>( } if item.deprecation.is_some() { - deprecated.push(index); + // bitmasks always use 1-indexing for items, with 0 as the crate itself + deprecated.push(u32::try_from(index + 1).unwrap()); } } @@ -473,9 +477,18 @@ pub(crate) fn build_index<'tcx>( crate_data.serialize_field("i", &parents)?; crate_data.serialize_field("f", &functions)?; crate_data.serialize_field("D", &self.descindex)?; - crate_data.serialize_field("c", &deprecated)?; crate_data.serialize_field("p", &paths)?; crate_data.serialize_field("b", &self.associated_item_disambiguators)?; + let mut buf = Vec::new(); + let mut strbuf = String::new(); + write_bitmap_to_bytes(&deprecated, &mut buf).unwrap(); + BASE64_STANDARD.encode_string(&buf, &mut strbuf); + crate_data.serialize_field("c", &strbuf)?; + strbuf.clear(); + buf.clear(); + write_bitmap_to_bytes(&self.emptydesc, &mut buf).unwrap(); + BASE64_STANDARD.encode_string(&buf, &mut strbuf); + crate_data.serialize_field("e", &strbuf)?; if has_aliases { crate_data.serialize_field("a", &self.aliases)?; } @@ -483,11 +496,18 @@ pub(crate) fn build_index<'tcx>( } } - let desc = { + let (emptydesc, desc) = { + let mut emptydesc = Vec::new(); let mut result = Vec::new(); let mut set = String::new(); let mut len: usize = 0; + let mut itemindex: u32 = 0; for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) { + if desc == "" { + emptydesc.push(itemindex); + itemindex += 1; + continue; + } if set.len() >= DESC_INDEX_SHARD_LEN { result.push((len, std::mem::replace(&mut set, String::new()))); len = 0; @@ -496,9 +516,10 @@ pub(crate) fn build_index<'tcx>( } set.push_str(&desc); len += 1; + itemindex += 1; } result.push((len, std::mem::replace(&mut set, String::new()))); - result + (emptydesc, result) }; let descindex = { @@ -509,7 +530,10 @@ pub(crate) fn build_index<'tcx>( descindex }; - assert_eq!(crate_items.len() + 1, desc.iter().map(|(len, _)| *len).sum::()); + assert_eq!( + crate_items.len() + 1, + desc.iter().map(|(len, _)| *len).sum::() + emptydesc.len() + ); // The index, which is actually used to search, is JSON // It uses `JSON.parse(..)` to actually load, since JSON @@ -523,6 +547,7 @@ pub(crate) fn build_index<'tcx>( aliases: &aliases, associated_item_disambiguators: &associated_item_disambiguators, descindex, + emptydesc, }) .expect("failed serde conversion") // All these `replace` calls are because we have to go through JS string for JSON content. @@ -571,6 +596,200 @@ pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) { } } +// checked against roaring-rs in +// https://gitlab.com/notriddle/roaring-test +pub fn write_bitmap_to_bytes(domain: &[u32], mut out: impl std::io::Write) -> std::io::Result<()> { + // https://arxiv.org/pdf/1603.06549.pdf + let mut keys = Vec::::new(); + let mut containers = Vec::::new(); + enum Container { + /// number of ones, bits + Bits(Box<[u64; 1024]>), + /// list of entries + Array(Vec), + /// list of (start, len-1) + Run(Vec<(u16, u16)>), + } + impl Container { + fn popcount(&self) -> u32 { + match self { + Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(), + Container::Array(array) => { + array.len().try_into().expect("array can't be bigger than 2**32") + } + Container::Run(runs) => { + runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum() + } + } + } + fn push(&mut self, value: u16) { + match self { + Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F), + Container::Array(array) => { + array.push(value); + if array.len() >= 4096 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Bits(Box::new([0; 1024])); + for value in array { + self.push(value); + } + } + } + Container::Run(runs) => { + if let Some(r) = runs.last_mut() + && r.0 + r.1 + 1 == value + { + r.1 += 1; + } else { + runs.push((value, 0)); + } + } + } + } + fn try_make_run(&mut self) -> bool { + match self { + Container::Bits(bits) => { + let mut r: u64 = 0; + for (i, chunk) in bits.iter().copied().enumerate() { + let next_chunk = + i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0); + r += !chunk & u64::from((chunk << 1).count_ones()); + r += !next_chunk & u64::from((chunk >> 63).count_ones()); + } + if (2 + 4 * r) < 8192 { + let bits = std::mem::replace(bits, Box::new([0; 1024])); + *self = Container::Run(Vec::new()); + for (i, bits) in bits.iter().copied().enumerate() { + if bits == 0 { + continue; + } + for j in 0..64 { + let value = (u16::try_from(i).unwrap() << 6) | j; + if bits & (1 << j) != 0 { + self.push(value); + } + } + } + true + } else { + false + } + } + Container::Array(array) if array.len() <= 5 => false, + Container::Array(array) => { + let mut r = 0; + let mut prev = None; + for value in array.iter().copied() { + if value.checked_sub(1) != prev { + r += 1; + } + prev = Some(value); + } + if 2 + 4 * r < 2 * array.len() + 2 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Run(Vec::new()); + for value in array { + self.push(value); + } + true + } else { + false + } + } + Container::Run(_) => true, + } + } + } + let mut key: u16; + let mut domain_iter = domain.into_iter().copied().peekable(); + let mut has_run = false; + while let Some(entry) = domain_iter.next() { + key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"); + let mut container = Container::Array(vec![value]); + while let Some(entry) = domain_iter.peek().copied() { + let entry_key: u16 = + (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + if entry_key != key { + break; + } + domain_iter.next().expect("peeking just succeeded"); + container + .push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit")); + } + keys.push(key); + has_run = container.try_make_run() || has_run; + containers.push(container); + } + // https://github.com/RoaringBitmap/RoaringFormatSpec + use byteorder::{WriteBytesExt, LE}; + const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; + const SERIAL_COOKIE: u32 = 12347; + const NO_OFFSET_THRESHOLD: u32 = 4; + let size: u32 = containers.len().try_into().unwrap(); + let start_offset = if has_run { + out.write_u32::(SERIAL_COOKIE | ((size - 1) << 16))?; + for set in containers.chunks(8) { + let mut b = 0; + for (i, container) in set.iter().enumerate() { + if matches!(container, &Container::Run(..)) { + b |= 1 << i; + } + } + out.write_u8(b)?; + } + if size < NO_OFFSET_THRESHOLD { + 4 + 4 * size + ((size + 7) / 8) + } else { + 4 + 8 * size + ((size + 7) / 8) + } + } else { + out.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + out.write_u32::(containers.len().try_into().unwrap())?; + 4 + 4 + 4 * size + 4 * size + }; + for (&key, container) in keys.iter().zip(&containers) { + // descriptive header + let key: u32 = key.into(); + let count: u32 = container.popcount() - 1; + out.write_u32::((count << 16) | key)?; + } + if !has_run || size >= NO_OFFSET_THRESHOLD { + // offset header + let mut starting_offset = start_offset; + for container in &containers { + out.write_u32::(starting_offset)?; + starting_offset += match container { + Container::Bits(_) => 8192u32, + Container::Array(array) => u32::try_from(array.len()).unwrap() * 2, + Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4, + }; + } + } + for container in &containers { + match container { + Container::Bits(bits) => { + for chunk in bits.iter() { + out.write_u64::(*chunk)?; + } + } + Container::Array(array) => { + for value in array.iter() { + out.write_u16::(*value)?; + } + } + Container::Run(runs) => { + out.write_u16::((runs.len()).try_into().unwrap())?; + for (start, lenm1) in runs.iter().copied() { + out.write_u16::(start)?; + out.write_u16::(lenm1)?; + } + } + } + } + Ok(()) +} + pub(crate) fn get_function_type_for_search<'tcx>( item: &clean::Item, tcx: TyCtxt<'tcx>, diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 15da5bf96b2ca..e70c5bfd734bf 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -242,6 +242,14 @@ function initSearch(rawSearchIndex) { * @type {Array} */ let searchIndex; + /** + * @type {Map} + */ + let searchIndexDeprecated; + /** + * @type {Map} + */ + let searchIndexEmptyDesc; /** * @type {Uint32Array} */ @@ -1326,7 +1334,6 @@ function initSearch(rawSearchIndex) { duplicates.add(obj.fullPath); obj.href = res[1]; - obj.desc = result.desc; out.push(obj); if (out.length >= MAX_RESULTS) { break; @@ -1353,12 +1360,6 @@ function initSearch(rawSearchIndex) { result.word = searchIndex[result.id].word; result_list.push(result); } - for (const result of result_list) { - result.desc = searchState.loadDesc(result.item); - } - for (const result of result_list) { - result.desc = await result.desc; - } result_list.sort((aaa, bbb) => { let a, b; @@ -1401,8 +1402,8 @@ function initSearch(rawSearchIndex) { } // sort deprecated items later - a = aaa.item.deprecated; - b = bbb.item.deprecated; + a = searchIndexDeprecated.get(aaa.item.crate).contains(aaa.item.bitIndex); + b = searchIndexDeprecated.get(bbb.item.crate).contains(bbb.item.bitIndex); if (a !== b) { return a - b; } @@ -1429,8 +1430,8 @@ function initSearch(rawSearchIndex) { } // sort by description (no description goes later) - a = (aaa.desc === ""); - b = (bbb.desc === ""); + a = searchIndexEmptyDesc.get(aaa.item.crate).contains(aaa.item.bitIndex); + b = searchIndexEmptyDesc.get(bbb.item.crate).contains(bbb.item.bitIndex); if (a !== b) { return a - b; } @@ -1453,7 +1454,16 @@ function initSearch(rawSearchIndex) { return 0; }); - return transformResults(result_list); + const transformed = transformResults(result_list); + for (const result of transformed) { + result.desc = searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ? + "" : + searchState.loadDesc(result); + } + for (const result of transformed) { + result.desc = await result.desc; + } + return transformed; } /** @@ -2079,7 +2089,7 @@ function initSearch(rawSearchIndex) { parent: item.parent, type: item.type, is_alias: true, - deprecated: item.deprecated, + bitIndex: item.bitIndex, implDisambiguator: item.implDisambiguator, }; } @@ -2712,9 +2722,11 @@ ${item.displayPath}${name}\ currentResults = results.query.userQuery; - const ret_others = await addTab(results.others, results.query, true); - const ret_in_args = await addTab(results.in_args, results.query, false); - const ret_returned = await addTab(results.returned, results.query, false); + const [ret_others, ret_in_args, ret_returned] = await Promise.all([ + addTab(results.others, results.query, true), + addTab(results.in_args, results.query, false), + addTab(results.returned, results.query, false), + ]); // Navigate to the relevant tab if the current tab is empty, like in case users search // for "-> String". If they had selected another tab previously, they have to click on @@ -3267,6 +3279,123 @@ ${item.displayPath}${name}\ return result; } } + class RoaringBitmap { + constructor(str) { + const strdecoded = atob(str); + const u8array = new Uint8Array(strdecoded.length); + for (let j = 0; j < strdecoded.length; ++j) { + u8array[j] = strdecoded.charCodeAt(j); + } + const has_runs = u8array[0] === 0x3b; + const size = has_runs ? + ((u8array[2] | (u8array[3] << 8)) + 1) : + ((u8array[4] | (u8array[5] << 8) | (u8array[6] << 16) | (u8array[7] << 24))); + let i = has_runs ? 4 : 8; + let is_run; + if (has_runs) { + const is_run_len = Math.floor((size + 7) / 8); + is_run = u8array.slice(i, i + is_run_len); + i += is_run_len; + } else { + is_run = new Uint8Array(); + } + this.keys = []; + this.cardinalities = []; + for (let j = 0; j < size; ++j) { + this.keys.push(u8array[i] | (u8array[i + 1] << 8)); + i += 2; + this.cardinalities.push((u8array[i] | (u8array[i + 1] << 8)) + 1); + i += 2; + } + this.containers = []; + let offsets = null; + if (!has_runs || this.keys.length >= 4) { + offsets = []; + for (let j = 0; j < size; ++j) { + offsets.push(u8array[i] | (u8array[i + 1] << 8) | (u8array[i + 2] << 16) | + (u8array[i + 3] << 24)); + i += 4; + } + } + for (let j = 0; j < size; ++j) { + if (offsets && offsets[j] !== i) { + console.log(this.containers); + throw new Error(`corrupt bitmap ${j}: ${i} / ${offsets[j]}`); + } + if (is_run[j >> 3] & (1 << (j & 0x7))) { + const runcount = (u8array[i] | (u8array[i + 1] << 8)); + i += 2; + this.containers.push(new RoaringBitmapRun( + runcount, + u8array.slice(i, i + (runcount * 4)), + )); + i += runcount * 4; + } else if (this.cardinalities[j] >= 4096) { + this.containers.push(new RoaringBitmapBits(u8array.slice(i, i + 8192))); + i += 8192; + } else { + const end = this.cardinalities[j] * 2; + this.containers.push(new RoaringBitmapArray( + this.cardinalities[j], + u8array.slice(i, i + end), + )); + i += end; + } + } + } + contains(keyvalue) { + const key = keyvalue >> 16; + const value = keyvalue & 0xFFFF; + for (let i = 0; i < this.keys.length; ++i) { + if (this.keys[i] === key) { + return this.containers[i].contains(value); + } + } + return false; + } + } + + class RoaringBitmapRun { + constructor(runcount, array) { + this.runcount = runcount; + this.array = array; + } + contains(value) { + const l = this.runcount * 4; + for (let i = 0; i < l; i += 4) { + const start = this.array[i] | (this.array[i + 1] << 8); + const lenm1 = this.array[i + 2] | (this.array[i + 3] << 8); + if (value >= start && value <= (start + lenm1)) { + return true; + } + } + return false; + } + } + class RoaringBitmapArray { + constructor(cardinality, array) { + this.cardinality = cardinality; + this.array = array; + } + contains(value) { + const l = this.cardinality * 2; + for (let i = 0; i < l; i += 2) { + const start = this.array[i] | (this.array[i + 1] << 8); + if (value === start) { + return true; + } + } + return false; + } + } + class RoaringBitmapBits { + constructor(array) { + this.array = array; + } + contains(value) { + return !!(this.array[value >> 3] & (1 << (value & 7))); + } + } /** * Convert raw search index into in-memory search index. @@ -3275,6 +3404,8 @@ ${item.displayPath}${name}\ */ function buildIndex(rawSearchIndex) { searchIndex = []; + searchIndexDeprecated = new Map(); + searchIndexEmptyDesc = new Map(); const charA = "A".charCodeAt(0); let currentIndex = 0; let id = 0; @@ -3307,6 +3438,11 @@ ${item.displayPath}${name}\ }; const descShardList = [ descShard ]; + // Deprecated items and items with no description + searchIndexDeprecated.set(crate, new RoaringBitmap(crateCorpus.c)); + searchIndexEmptyDesc.set(crate, new RoaringBitmap(crateCorpus.e)); + let descIndex = 0; + // This object should have exactly the same set of fields as the "row" // object defined below. Your JavaScript runtime will thank you. // https://mathiasbynens.be/notes/shapes-ics @@ -3316,18 +3452,21 @@ ${item.displayPath}${name}\ name: crate, path: "", descShard, - descIndex: 0, + descIndex, parent: undefined, type: null, id, word: crate, normalizedName: crate.indexOf("_") === -1 ? crate : crate.replace(/_/g, ""), - deprecated: null, + bitIndex: 0, implDisambiguator: null, }; id += 1; searchIndex.push(crateRow); currentIndex += 1; + if (!searchIndexEmptyDesc.get(crate).contains(0)) { + descIndex += 1; + } // a String of one character item type codes const itemTypes = crateCorpus.t; @@ -3341,9 +3480,7 @@ ${item.displayPath}${name}\ const itemPaths = new Map(crateCorpus.q); // an array of (Number) the parent path index + 1 to `paths`, or 0 if none const itemParentIdxs = crateCorpus.i; - // an array of (Number) indices for the deprecated items - const deprecatedItems = new Set(crateCorpus.c); - // an array of (Number) indices for the deprecated items + // a map Number, string for impl disambiguators const implDisambiguator = new Map(crateCorpus.b); // an array of [(Number) item type, // (String) name] @@ -3388,9 +3525,10 @@ ${item.displayPath}${name}\ // faster analysis operations lastPath = ""; len = itemTypes.length; - let descIndex = 1; for (let i = 0; i < len; ++i) { - if (descIndex >= descShard.len) { + const bitIndex = i + 1; + if (descIndex >= descShard.len && + !searchIndexEmptyDesc.get(crate).contains(bitIndex)) { descShard = { crate, shard: descShard.shard + 1, @@ -3439,13 +3577,15 @@ ${item.displayPath}${name}\ id, word, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), - deprecated: deprecatedItems.has(i), + bitIndex, implDisambiguator: implDisambiguator.has(i) ? implDisambiguator.get(i) : null, }; id += 1; searchIndex.push(row); lastPath = row.path; - descIndex += 1; + if (!searchIndexEmptyDesc.get(crate).contains(bitIndex)) { + descIndex += 1; + } } if (aliases) { diff --git a/src/tools/rustdoc-js/tester.js b/src/tools/rustdoc-js/tester.js index 1af2f44c230d6..43a22f358c31f 100644 --- a/src/tools/rustdoc-js/tester.js +++ b/src/tools/rustdoc-js/tester.js @@ -559,8 +559,3 @@ process.on("beforeExit", () => { console.log("process did not complete"); process.exit(1); }); - -/*process.on("uncaughtException", (err) => { - console.log(`Uncaught Exception: ${err.message}`); - process.exit(1); -});*/ From c65f7d8ff181217e96662ec0ca4e5e6d9507d3dc Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 22 Mar 2024 17:00:38 -0700 Subject: [PATCH 6/7] rustdoc-search: address nits --- src/librustdoc/html/render/mod.rs | 2 +- src/librustdoc/html/render/search_index.rs | 301 +++--------------- .../html/render/search_index/encode.rs | 245 ++++++++++++++ src/librustdoc/html/static/js/main.js | 4 + src/librustdoc/html/static/js/search.js | 10 +- 5 files changed, 297 insertions(+), 265 deletions(-) create mode 100644 src/librustdoc/html/render/search_index/encode.rs diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index c1a7593c26f08..0c92dbd90f180 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -192,7 +192,7 @@ impl RenderTypeId { RenderTypeId::Index(idx) => (*idx).try_into().unwrap(), _ => panic!("must convert render types to indexes before serializing"), }; - search_index::write_vlqhex_to_string(id, string); + search_index::encode::write_vlqhex_to_string(id, string); } } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 2ec22df0b434f..60a3ada3b7c76 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -1,7 +1,8 @@ +pub(crate) mod encode; + use std::collections::hash_map::Entry; use std::collections::{BTreeMap, VecDeque}; -use base64::prelude::*; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; @@ -18,12 +19,33 @@ use crate::html::format::join_with_double_colon; use crate::html::markdown::short_markdown_summary; use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, RenderTypeId}; +use encode::{bitmap_to_string, write_vlqhex_to_string}; + /// The serialized search description sharded version /// /// The `index` is a JSON-encoded list of names and other information. /// /// The desc has newlined descriptions, split up by size into 128KiB shards. /// For example, `(4, "foo\nbar\nbaz\nquux")`. +/// +/// There is no single, optimal size for these shards, because it depends on +/// configuration values that we can't predict or control, such as the version +/// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2 +/// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether +/// the search query is going to produce a large number of results or a small +/// number, the bandwidth delay product of the network... +/// +/// Gzipping some standard library descriptions to guess what transport +/// compression will do, the compressed file sizes can be as small as 4.9KiB +/// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers). +/// A "reasonable" range for files is for them to be bigger than 1KiB, +/// since that's about the amount of data that can be transferred in a +/// single TCP packet, and 64KiB, the maximum amount of data that +/// TCP can transfer in a single round trip without extensions. +/// +/// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media +/// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept +/// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features pub(crate) struct SerializedSearchIndex { pub(crate) index: String, pub(crate) desc: Vec<(usize, String)>, @@ -342,9 +364,9 @@ pub(crate) fn build_index<'tcx>( associated_item_disambiguators: &'a Vec<(usize, String)>, // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string // for information on the format. - descindex: String, + desc_index: String, // A list of items with no description. This is eventually turned into a bitmap. - emptydesc: Vec, + empty_desc: Vec, } struct Paths { @@ -476,19 +498,11 @@ pub(crate) fn build_index<'tcx>( crate_data.serialize_field("q", &full_paths)?; crate_data.serialize_field("i", &parents)?; crate_data.serialize_field("f", &functions)?; - crate_data.serialize_field("D", &self.descindex)?; + crate_data.serialize_field("D", &self.desc_index)?; crate_data.serialize_field("p", &paths)?; crate_data.serialize_field("b", &self.associated_item_disambiguators)?; - let mut buf = Vec::new(); - let mut strbuf = String::new(); - write_bitmap_to_bytes(&deprecated, &mut buf).unwrap(); - BASE64_STANDARD.encode_string(&buf, &mut strbuf); - crate_data.serialize_field("c", &strbuf)?; - strbuf.clear(); - buf.clear(); - write_bitmap_to_bytes(&self.emptydesc, &mut buf).unwrap(); - BASE64_STANDARD.encode_string(&buf, &mut strbuf); - crate_data.serialize_field("e", &strbuf)?; + crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?; + crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?; if has_aliases { crate_data.serialize_field("a", &self.aliases)?; } @@ -496,16 +510,16 @@ pub(crate) fn build_index<'tcx>( } } - let (emptydesc, desc) = { - let mut emptydesc = Vec::new(); + let (empty_desc, desc) = { + let mut empty_desc = Vec::new(); let mut result = Vec::new(); let mut set = String::new(); let mut len: usize = 0; - let mut itemindex: u32 = 0; + let mut item_index: u32 = 0; for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) { if desc == "" { - emptydesc.push(itemindex); - itemindex += 1; + empty_desc.push(item_index); + item_index += 1; continue; } if set.len() >= DESC_INDEX_SHARD_LEN { @@ -516,23 +530,23 @@ pub(crate) fn build_index<'tcx>( } set.push_str(&desc); len += 1; - itemindex += 1; + item_index += 1; } result.push((len, std::mem::replace(&mut set, String::new()))); - (emptydesc, result) + (empty_desc, result) }; - let descindex = { - let mut descindex = String::with_capacity(desc.len() * 4); + let desc_index = { + let mut desc_index = String::with_capacity(desc.len() * 4); for &(len, _) in desc.iter() { - write_vlqhex_to_string(len.try_into().unwrap(), &mut descindex); + write_vlqhex_to_string(len.try_into().unwrap(), &mut desc_index); } - descindex + desc_index }; assert_eq!( crate_items.len() + 1, - desc.iter().map(|(len, _)| *len).sum::() + emptydesc.len() + desc.iter().map(|(len, _)| *len).sum::() + empty_desc.len() ); // The index, which is actually used to search, is JSON @@ -546,8 +560,8 @@ pub(crate) fn build_index<'tcx>( paths: crate_paths, aliases: &aliases, associated_item_disambiguators: &associated_item_disambiguators, - descindex, - emptydesc, + desc_index, + empty_desc, }) .expect("failed serde conversion") // All these `replace` calls are because we have to go through JS string for JSON content. @@ -559,237 +573,6 @@ pub(crate) fn build_index<'tcx>( SerializedSearchIndex { index, desc } } -pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) { - let (sign, magnitude): (bool, u32) = - if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) }; - // zig-zag encoding - let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 }); - // Self-terminating hex use capital letters for everything but the - // least significant digit, which is lowercase. For example, decimal 17 - // would be `` Aa `` if zig-zag encoding weren't used. - // - // Zig-zag encoding, however, stores the sign bit as the last bit. - // This means, in the last hexit, 1 is actually `c`, -1 is `b` - // (`a` is the imaginary -0), and, because all the bits are shifted - // by one, `` A` `` is actually 8 and `` Aa `` is -8. - // - // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html - // describes the encoding in more detail. - let mut shift: u32 = 28; - let mut mask: u32 = 0xF0_00_00_00; - // first skip leading zeroes - while shift < 32 { - let hexit = (value & mask) >> shift; - if hexit != 0 || shift == 0 { - break; - } - shift = shift.wrapping_sub(4); - mask = mask >> 4; - } - // now write the rest - while shift < 32 { - let hexit = (value & mask) >> shift; - let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap(); - string.push(hex); - shift = shift.wrapping_sub(4); - mask = mask >> 4; - } -} - -// checked against roaring-rs in -// https://gitlab.com/notriddle/roaring-test -pub fn write_bitmap_to_bytes(domain: &[u32], mut out: impl std::io::Write) -> std::io::Result<()> { - // https://arxiv.org/pdf/1603.06549.pdf - let mut keys = Vec::::new(); - let mut containers = Vec::::new(); - enum Container { - /// number of ones, bits - Bits(Box<[u64; 1024]>), - /// list of entries - Array(Vec), - /// list of (start, len-1) - Run(Vec<(u16, u16)>), - } - impl Container { - fn popcount(&self) -> u32 { - match self { - Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(), - Container::Array(array) => { - array.len().try_into().expect("array can't be bigger than 2**32") - } - Container::Run(runs) => { - runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum() - } - } - } - fn push(&mut self, value: u16) { - match self { - Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F), - Container::Array(array) => { - array.push(value); - if array.len() >= 4096 { - let array = std::mem::replace(array, Vec::new()); - *self = Container::Bits(Box::new([0; 1024])); - for value in array { - self.push(value); - } - } - } - Container::Run(runs) => { - if let Some(r) = runs.last_mut() - && r.0 + r.1 + 1 == value - { - r.1 += 1; - } else { - runs.push((value, 0)); - } - } - } - } - fn try_make_run(&mut self) -> bool { - match self { - Container::Bits(bits) => { - let mut r: u64 = 0; - for (i, chunk) in bits.iter().copied().enumerate() { - let next_chunk = - i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0); - r += !chunk & u64::from((chunk << 1).count_ones()); - r += !next_chunk & u64::from((chunk >> 63).count_ones()); - } - if (2 + 4 * r) < 8192 { - let bits = std::mem::replace(bits, Box::new([0; 1024])); - *self = Container::Run(Vec::new()); - for (i, bits) in bits.iter().copied().enumerate() { - if bits == 0 { - continue; - } - for j in 0..64 { - let value = (u16::try_from(i).unwrap() << 6) | j; - if bits & (1 << j) != 0 { - self.push(value); - } - } - } - true - } else { - false - } - } - Container::Array(array) if array.len() <= 5 => false, - Container::Array(array) => { - let mut r = 0; - let mut prev = None; - for value in array.iter().copied() { - if value.checked_sub(1) != prev { - r += 1; - } - prev = Some(value); - } - if 2 + 4 * r < 2 * array.len() + 2 { - let array = std::mem::replace(array, Vec::new()); - *self = Container::Run(Vec::new()); - for value in array { - self.push(value); - } - true - } else { - false - } - } - Container::Run(_) => true, - } - } - } - let mut key: u16; - let mut domain_iter = domain.into_iter().copied().peekable(); - let mut has_run = false; - while let Some(entry) = domain_iter.next() { - key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); - let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"); - let mut container = Container::Array(vec![value]); - while let Some(entry) = domain_iter.peek().copied() { - let entry_key: u16 = - (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); - if entry_key != key { - break; - } - domain_iter.next().expect("peeking just succeeded"); - container - .push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit")); - } - keys.push(key); - has_run = container.try_make_run() || has_run; - containers.push(container); - } - // https://github.com/RoaringBitmap/RoaringFormatSpec - use byteorder::{WriteBytesExt, LE}; - const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; - const SERIAL_COOKIE: u32 = 12347; - const NO_OFFSET_THRESHOLD: u32 = 4; - let size: u32 = containers.len().try_into().unwrap(); - let start_offset = if has_run { - out.write_u32::(SERIAL_COOKIE | ((size - 1) << 16))?; - for set in containers.chunks(8) { - let mut b = 0; - for (i, container) in set.iter().enumerate() { - if matches!(container, &Container::Run(..)) { - b |= 1 << i; - } - } - out.write_u8(b)?; - } - if size < NO_OFFSET_THRESHOLD { - 4 + 4 * size + ((size + 7) / 8) - } else { - 4 + 8 * size + ((size + 7) / 8) - } - } else { - out.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; - out.write_u32::(containers.len().try_into().unwrap())?; - 4 + 4 + 4 * size + 4 * size - }; - for (&key, container) in keys.iter().zip(&containers) { - // descriptive header - let key: u32 = key.into(); - let count: u32 = container.popcount() - 1; - out.write_u32::((count << 16) | key)?; - } - if !has_run || size >= NO_OFFSET_THRESHOLD { - // offset header - let mut starting_offset = start_offset; - for container in &containers { - out.write_u32::(starting_offset)?; - starting_offset += match container { - Container::Bits(_) => 8192u32, - Container::Array(array) => u32::try_from(array.len()).unwrap() * 2, - Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4, - }; - } - } - for container in &containers { - match container { - Container::Bits(bits) => { - for chunk in bits.iter() { - out.write_u64::(*chunk)?; - } - } - Container::Array(array) => { - for value in array.iter() { - out.write_u16::(*value)?; - } - } - Container::Run(runs) => { - out.write_u16::((runs.len()).try_into().unwrap())?; - for (start, lenm1) in runs.iter().copied() { - out.write_u16::(start)?; - out.write_u16::(lenm1)?; - } - } - } - } - Ok(()) -} - pub(crate) fn get_function_type_for_search<'tcx>( item: &clean::Item, tcx: TyCtxt<'tcx>, diff --git a/src/librustdoc/html/render/search_index/encode.rs b/src/librustdoc/html/render/search_index/encode.rs new file mode 100644 index 0000000000000..af5eccd5bedc6 --- /dev/null +++ b/src/librustdoc/html/render/search_index/encode.rs @@ -0,0 +1,245 @@ +use base64::prelude::*; + +pub(crate) fn write_vlqhex_to_string(n: i32, string: &mut String) { + let (sign, magnitude): (bool, u32) = + if n >= 0 { (false, n.try_into().unwrap()) } else { (true, (-n).try_into().unwrap()) }; + // zig-zag encoding + let value: u32 = (magnitude << 1) | (if sign { 1 } else { 0 }); + // Self-terminating hex use capital letters for everything but the + // least significant digit, which is lowercase. For example, decimal 17 + // would be `` Aa `` if zig-zag encoding weren't used. + // + // Zig-zag encoding, however, stores the sign bit as the last bit. + // This means, in the last hexit, 1 is actually `c`, -1 is `b` + // (`a` is the imaginary -0), and, because all the bits are shifted + // by one, `` A` `` is actually 8 and `` Aa `` is -8. + // + // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html + // describes the encoding in more detail. + let mut shift: u32 = 28; + let mut mask: u32 = 0xF0_00_00_00; + // first skip leading zeroes + while shift < 32 { + let hexit = (value & mask) >> shift; + if hexit != 0 || shift == 0 { + break; + } + shift = shift.wrapping_sub(4); + mask = mask >> 4; + } + // now write the rest + while shift < 32 { + let hexit = (value & mask) >> shift; + let hex = char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap(); + string.push(hex); + shift = shift.wrapping_sub(4); + mask = mask >> 4; + } +} + +// Used during bitmap encoding +enum Container { + /// number of ones, bits + Bits(Box<[u64; 1024]>), + /// list of entries + Array(Vec), + /// list of (start, len-1) + Run(Vec<(u16, u16)>), +} +impl Container { + fn popcount(&self) -> u32 { + match self { + Container::Bits(bits) => bits.iter().copied().map(|x| x.count_ones()).sum(), + Container::Array(array) => { + array.len().try_into().expect("array can't be bigger than 2**32") + } + Container::Run(runs) => { + runs.iter().copied().map(|(_, lenm1)| u32::from(lenm1) + 1).sum() + } + } + } + fn push(&mut self, value: u16) { + match self { + Container::Bits(bits) => bits[value as usize >> 6] |= 1 << (value & 0x3F), + Container::Array(array) => { + array.push(value); + if array.len() >= 4096 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Bits(Box::new([0; 1024])); + for value in array { + self.push(value); + } + } + } + Container::Run(runs) => { + if let Some(r) = runs.last_mut() + && r.0 + r.1 + 1 == value + { + r.1 += 1; + } else { + runs.push((value, 0)); + } + } + } + } + fn try_make_run(&mut self) -> bool { + match self { + Container::Bits(bits) => { + let mut r: u64 = 0; + for (i, chunk) in bits.iter().copied().enumerate() { + let next_chunk = + i.checked_add(1).and_then(|i| bits.get(i)).copied().unwrap_or(0); + r += !chunk & u64::from((chunk << 1).count_ones()); + r += !next_chunk & u64::from((chunk >> 63).count_ones()); + } + if (2 + 4 * r) < 8192 { + let bits = std::mem::replace(bits, Box::new([0; 1024])); + *self = Container::Run(Vec::new()); + for (i, bits) in bits.iter().copied().enumerate() { + if bits == 0 { + continue; + } + for j in 0..64 { + let value = (u16::try_from(i).unwrap() << 6) | j; + if bits & (1 << j) != 0 { + self.push(value); + } + } + } + true + } else { + false + } + } + Container::Array(array) if array.len() <= 5 => false, + Container::Array(array) => { + let mut r = 0; + let mut prev = None; + for value in array.iter().copied() { + if value.checked_sub(1) != prev { + r += 1; + } + prev = Some(value); + } + if 2 + 4 * r < 2 * array.len() + 2 { + let array = std::mem::replace(array, Vec::new()); + *self = Container::Run(Vec::new()); + for value in array { + self.push(value); + } + true + } else { + false + } + } + Container::Run(_) => true, + } + } +} + +// checked against roaring-rs in +// https://gitlab.com/notriddle/roaring-test +pub(crate) fn write_bitmap_to_bytes( + domain: &[u32], + mut out: impl std::io::Write, +) -> std::io::Result<()> { + // https://arxiv.org/pdf/1603.06549.pdf + let mut keys = Vec::::new(); + let mut containers = Vec::::new(); + let mut key: u16; + let mut domain_iter = domain.into_iter().copied().peekable(); + let mut has_run = false; + while let Some(entry) = domain_iter.next() { + key = (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + let value: u16 = (entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit"); + let mut container = Container::Array(vec![value]); + while let Some(entry) = domain_iter.peek().copied() { + let entry_key: u16 = + (entry >> 16).try_into().expect("shifted off the top 16 bits, so it should fit"); + if entry_key != key { + break; + } + domain_iter.next().expect("peeking just succeeded"); + container + .push((entry & 0x00_00_FF_FF).try_into().expect("AND 16 bits, so it should fit")); + } + keys.push(key); + has_run = container.try_make_run() || has_run; + containers.push(container); + } + // https://github.com/RoaringBitmap/RoaringFormatSpec + use byteorder::{WriteBytesExt, LE}; + const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; + const SERIAL_COOKIE: u32 = 12347; + const NO_OFFSET_THRESHOLD: u32 = 4; + let size: u32 = containers.len().try_into().unwrap(); + let start_offset = if has_run { + out.write_u32::(SERIAL_COOKIE | ((size - 1) << 16))?; + for set in containers.chunks(8) { + let mut b = 0; + for (i, container) in set.iter().enumerate() { + if matches!(container, &Container::Run(..)) { + b |= 1 << i; + } + } + out.write_u8(b)?; + } + if size < NO_OFFSET_THRESHOLD { + 4 + 4 * size + ((size + 7) / 8) + } else { + 4 + 8 * size + ((size + 7) / 8) + } + } else { + out.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + out.write_u32::(containers.len().try_into().unwrap())?; + 4 + 4 + 4 * size + 4 * size + }; + for (&key, container) in keys.iter().zip(&containers) { + // descriptive header + let key: u32 = key.into(); + let count: u32 = container.popcount() - 1; + out.write_u32::((count << 16) | key)?; + } + if !has_run || size >= NO_OFFSET_THRESHOLD { + // offset header + let mut starting_offset = start_offset; + for container in &containers { + out.write_u32::(starting_offset)?; + starting_offset += match container { + Container::Bits(_) => 8192u32, + Container::Array(array) => u32::try_from(array.len()).unwrap() * 2, + Container::Run(runs) => 2 + u32::try_from(runs.len()).unwrap() * 4, + }; + } + } + for container in &containers { + match container { + Container::Bits(bits) => { + for chunk in bits.iter() { + out.write_u64::(*chunk)?; + } + } + Container::Array(array) => { + for value in array.iter() { + out.write_u16::(*value)?; + } + } + Container::Run(runs) => { + out.write_u16::((runs.len()).try_into().unwrap())?; + for (start, lenm1) in runs.iter().copied() { + out.write_u16::(start)?; + out.write_u16::(lenm1)?; + } + } + } + } + Ok(()) +} + +pub(crate) fn bitmap_to_string(domain: &[u32]) -> String { + let mut buf = Vec::new(); + let mut strbuf = String::new(); + write_bitmap_to_bytes(&domain, &mut buf).unwrap(); + BASE64_STANDARD.encode_string(&buf, &mut strbuf); + strbuf +} diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index eff3dc27911bf..940b62be0c94d 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -333,6 +333,10 @@ function preLoadCss(cssUrl) { loadDesc: async function({descShard, descIndex}) { if (descShard.promise === null) { descShard.promise = new Promise((resolve, reject) => { + // The `resolve` callback is stored in the `descShard` + // object, which is itself stored in `this.descShards` map. + // It is called in `loadedDescShard` by the + // search.desc script. descShard.resolve = resolve; const ds = descShard; const fname = `${ds.crate}-desc-${ds.shard}-`; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index e70c5bfd734bf..3daf1ad22ded3 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1455,13 +1455,13 @@ function initSearch(rawSearchIndex) { }); const transformed = transformResults(result_list); - for (const result of transformed) { - result.desc = searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ? + const descs = await Promise.all(transformed.map(result => { + return searchIndexEmptyDesc.get(result.crate).contains(result.bitIndex) ? "" : searchState.loadDesc(result); - } - for (const result of transformed) { - result.desc = await result.desc; + })); + for (const [i, result] of transformed.entries()) { + result.desc = descs[i]; } return transformed; } From a272007a209a4327ad8122af1ad9f9620f04724e Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 2 Apr 2024 07:55:52 -0700 Subject: [PATCH 7/7] Clean up src/librustdoc/html/render/search_index/encode.rs Co-authored-by: Guillaume Gomez --- .../html/render/search_index/encode.rs | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/librustdoc/html/render/search_index/encode.rs b/src/librustdoc/html/render/search_index/encode.rs index af5eccd5bedc6..54407c614c4c7 100644 --- a/src/librustdoc/html/render/search_index/encode.rs +++ b/src/librustdoc/html/render/search_index/encode.rs @@ -92,24 +92,23 @@ impl Container { r += !chunk & u64::from((chunk << 1).count_ones()); r += !next_chunk & u64::from((chunk >> 63).count_ones()); } - if (2 + 4 * r) < 8192 { - let bits = std::mem::replace(bits, Box::new([0; 1024])); - *self = Container::Run(Vec::new()); - for (i, bits) in bits.iter().copied().enumerate() { - if bits == 0 { - continue; - } - for j in 0..64 { - let value = (u16::try_from(i).unwrap() << 6) | j; - if bits & (1 << j) != 0 { - self.push(value); - } + if (2 + 4 * r) >= 8192 { + return false; + } + let bits = std::mem::replace(bits, Box::new([0; 1024])); + *self = Container::Run(Vec::new()); + for (i, bits) in bits.iter().copied().enumerate() { + if bits == 0 { + continue; + } + for j in 0..64 { + let value = (u16::try_from(i).unwrap() << 6) | j; + if bits & (1 << j) != 0 { + self.push(value); } } - true - } else { - false } + true } Container::Array(array) if array.len() <= 5 => false, Container::Array(array) => { @@ -121,16 +120,15 @@ impl Container { } prev = Some(value); } - if 2 + 4 * r < 2 * array.len() + 2 { - let array = std::mem::replace(array, Vec::new()); - *self = Container::Run(Vec::new()); - for value in array { - self.push(value); - } - true - } else { - false + if 2 + 4 * r >= 2 * array.len() + 2 { + return false; + } + let array = std::mem::replace(array, Vec::new()); + *self = Container::Run(Vec::new()); + for value in array { + self.push(value); } + true } Container::Run(_) => true, }