Pagefind searchUtils.ts

88.4% Statements 61/69
61.19% Branches 41/67
100% Functions 13/13
88.7% Lines 55/62
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32x
 
 
32x
 
 
6x
 
 
2x
2x
 
 
 
 
2x
2x
 
2x
 
2x
2x
2x
2x
2x
2x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14x
14x
14x
 
14x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18x
 
 
18x
4x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14x
7x
 
 
 
 
 
 
7x
 
 
 
 
14x
14x
20x
 
 
17x
34x
33x
33x
33x
33x
33x
 
 
 
17x
23x
7x
 
 
 
17x
14x
 
 
 
 
14x
3x
3x
3x
 
 
3x
 
 
 
14x
14x
14x
 
14x
 
 
14x
14x
 
 
 
14x
 
14x
 
 
 
14x
 
 
 
 
 
 
 
 
 
 
 
 
 
14x
 
  /**
 * @fileoverview Search result formatting utilities for Pagefind.
 * These functions transform raw Pagefind API responses into display-ready format.
 * @see https://pagefind.app/docs/api/
 */
 
import { isNumber } from 'lodash';
import type {
  PagefindSearchFragment, PagefindSubResult, PagefindSearchAnchor, FormattedSearchResult,
} from './types.js';
 
/**
 * Truncates an excerpt to ensure the <mark> tags are visible.
 * - Shows max 15 chars before <mark>
 * - Shows all content after <mark> (no limit)
 * - Adds ellipsis if prefix doesn't start at word boundary
 * - Handles HTML entities in the prefix
 *
 * @param excerpt - The raw excerpt from Pagefind
 * @returns Truncated excerpt with <mark> visible
 */
function truncateExcerptToShowMark(excerpt: string): string {
  const markStart = excerpt.indexOf('<mark>');
 
  // No mark found, return as is
  if (markStart === -1) return excerpt;
 
  // If mark is at position 0, return as is
  if (markStart === 0) return excerpt;
 
  // Get up to 15 chars before <mark>
  const prefix = excerpt.substring(0, markStart);
  const truncatedPrefix = prefix.slice(-15); // Last 15 chars
 
  // Check if starts at word boundary:
  // - Any whitespace (space, tab, newline)
  // - Any non-alphanumeric character
  const firstChar = truncatedPrefix[0];
  const isWordBoundary = /[\s\d_\-.,;:'"()[\]{}|\\/@#$%^&*!~`]/.test(firstChar);
 
  if (!isWordBoundary) {
    // Find the first word boundary in the prefix (within 15 chars of end)
    const searchArea = prefix.slice(-15);
    const wordBoundaryMatch = searchArea.match(/[\s\d_\-.,;:'"()[\]{}|\\/@#$%^&*!~`]/);
    if (wordBoundaryMatch) {
      const lastBoundaryIndex = prefix.lastIndexOf(wordBoundaryMatch[0], markStart - 1);
      if (lastBoundaryIndex !== -1 && lastBoundaryIndex < markStart) {
        return `...${prefix.substring(lastBoundaryIndex + 1)}${excerpt.substring(markStart)}`;
      }
    }
    // Fallback: use ellipsis + truncated
    return `...${truncatedPrefix}${excerpt.substring(markStart)}`;
  }
 
  // Starts at word boundary, no ellipsis needed
  return truncatedPrefix + excerpt.substring(markStart);
}
 
/**
 * Merges consecutive <mark> tags into a single <mark> tag.
 * e.g., "<mark>making</mark> <mark>the</mark>" becomes "<mark>making the</mark>"
 * This ensures that terms grouped together in the excerpt are displayed as a single highlighted segment.
 *
 * @param excerpt - The excerpt with potential consecutive <mark> tags
 * @returns Excerpt with merged <mark> tags
 */
function mergeConsecutiveMarks(excerpt: string): string {
  return excerpt.replace(/<\/mark>\s*<mark>/g, ' ');
}
 
/**
 * Parses a single sub-result (heading/section) within a page into a display-ready format.
 *
 * This function constructs a hierarchical title (breadcrumb) by finding all anchor elements
 * that appear before the current sub-result. For example, if the page has:
 * - h1: "Installation"
 *   - h2: "Windows"
 *     - h3: "Troubleshooting"
 *
 * And the sub-result is "Troubleshooting", the title becomes "Installation > Windows > Troubleshooting".
 *
 * @param sub - The sub-result from Pagefind
 * @param anchors - All anchor elements on the page
 * @param result - The parent Pagefind result
 * @returns Formatted search result with hierarchical title
 * @see https://pagefind.app/docs/sub-results/
 */
function parseSubResult(
  sub: PagefindSubResult,
  anchors: PagefindSearchAnchor[],
  result: PagefindSearchFragment,
): FormattedSearchResult {
  const route = sub?.url || result?.url;
  const description = mergeConsecutiveMarks(truncateExcerptToShowMark(sub?.excerpt || result?.excerpt || ''));
  const title = sub.title || '';
 
  return {
    route,
    meta: {
      ...result.meta,
      title,
      description,
    },
    result,
    isSubResult: true,
    isLastSubResult: false,
  };
}
 
/**
 * Formats raw Pagefind search results for display in the UI.
 *
 * This function performs four key transformations:
 *
 * 1. **Sort by `balance_score`**: Sorts weighted_locations by their `balance_score` (descending),
 *    then by `weight` (descending), then position (ascending) as a tie-breaker.
 *    This prioritizes matches by their scores first, then
 *    higher-weighted sections (e.g., headings) over body text, then finally their position on the page
 *
 * 2. **Pick Top Sub-Results**: Iterates through sorted locations and finds
 *    which sub-results (headings) contain those locations. If multiple
 *    sub-results contain the same location, keeps the one with more context
 *    (more locations). Stops after collecting `count` results.
 *
 * 3. **Re-sort by Document Order**: Resorts the selected sub-results by their
 *    position in the document, so they appear in natural reading order.
 *
 * 4. **Deduplicate**: Removes duplicate titles that may arise from overlapping matches.
 *
 * @param result - Raw Pagefind result from `pagefind.search().results[i].data()`
 * @param count - Maximum number of sub-results to return per page (default: 1)
 * @returns Array of formatted results ready for display
 * @see https://pagefind.app/docs/api-reference
 * @see https://pagefind.app/docs/ranking/
 * @see https://pagefind.app/docs/sub-results/
 *
 * @example
 * ```typescript
 * const search = await pagefind.search("installation");
 * const results = await Promise.all(search.results.map(r => r.data()));
 * const formatted = results.flatMap(r => formatPagefindResult(r, 2));
 * // Returns up to 2 sub-results per page, sorted by relevance
 * ```
 */
export function formatPagefindResult(
  result: PagefindSearchFragment,
  count = 10,
): FormattedSearchResult[] {
  const { sub_results: subResults, anchors, weighted_locations: weightedLocations } = result;
 
  // If no sub_results, return the main result as a non-sub-result
  if (!subResults || subResults.length === 0) {
    return [
      {
        route: result.url,
        meta: {
          ...result.meta,
          title: result.meta.title || '',
          description: mergeConsecutiveMarks(truncateExcerptToShowMark(result.excerpt || '')),
        },
        result,
        isSubResult: false,
        isLastSubResult: false,
      },
    ];
  }
 
  const sortedLocations = [...weightedLocations].sort((a, b) => {
    Iif (b.balanced_score === a.balanced_score) {
      // If equal balanced_score -> weight -> earlier position in document comes first.
      Iif (a.weight === b.weight) {
        return a.location - b.location;
      }
      return b.weight - a.weight;
    }
    return b.balanced_score - a.balanced_score;
  });
 
  // For each location, find matching subresults,
  // Then pick the subresult with the top `count` based on weighted locations.
  const subs: PagefindSubResult[] = [];
  sortedLocations.forEach(({ location }) => {
    if (subs.length >= count) return;
 
    // Find sub-results that contain this weighted location
    const filterData = subResults.filter((sub: PagefindSubResult) => {
      if (sub.title === result.meta.title) return false; // Skip page-level match
      const { locations } = sub;
      const [min] = locations || [];
      Iif (!isNumber(min)) return false;
      const max = locations.length === 1 ? Number.POSITIVE_INFINITY : locations[locations.length - 1];
      return min <= location && location <= max;
    });
 
    // Keep the sub-result with the most locations (most context)
    const sub = filterData.reduce<PagefindSubResult | null>((prev, curr) => {
      if (!prev) return curr;
      return prev.locations.length > curr.locations.length ? prev : curr;
    }, null);
 
    // Add only unique sub-results
    if (sub && !subs.some(existing => existing.title === sub.title)) {
      subs.push(sub);
    }
  });
 
  // Re-sort by document order (position in page).
  subs.sort((a, b) => {
    const [minA] = a.locations || [];
    const [minB] = b.locations || [];
    Iif (minA == null || minB == null) {
      return 0;
    }
    return minA - minB;
  });
 
  // Remove duplicate entries that may occur from overlapping matches.
  const filterMap = new Map<string, FormattedSearchResult>();
  const formattedSubResults = subs
    .map((sub: PagefindSubResult) => parseSubResult(sub, anchors, result))
    .filter((v: FormattedSearchResult) => {
      Iif (filterMap.has(v.meta.title)) {
        return false;
      }
      filterMap.set(v.meta.title, v);
      return true;
    });
 
  // Mark the last sub-result
  formattedSubResults.forEach((sub, index) => {
    // eslint-disable-next-line no-param-reassign
    sub.isLastSubResult = index === formattedSubResults.length - 1;
  });
 
  // Return main result first, then sub-results
  const mainResult = [
    {
      route: result.url,
      meta: {
        ...result.meta,
        title: result.meta.title || '',
        description: mergeConsecutiveMarks(truncateExcerptToShowMark(result.excerpt || '')),
      },
      result,
      isSubResult: false,
      isLastSubResult: false,
    },
  ];
 
  return [...mainResult, ...formattedSubResults];
}