import { AppBskyActorDefs, AppBskyRichtextFacet } from '../client' const REGEX = { LEADING_TRAILING_PUNCTUATION: /(?:^\p{P}+|\p{P}+$)/gu, ESCAPE: /[[\]{}()*+?.\\^$|\s]/g, SEPARATORS: /[/\-–—()[\]_]+/g, WORD_BOUNDARY: /[\s\n\t\r\f\v]+?/g, } /** * List of 2-letter lang codes for languages that either don't use spaces, or * don't use spaces in a way conducive to word-based filtering. * * For these, we use a simple `String.includes` to check for a match. */ const LANGUAGE_EXCEPTIONS = [ 'ja', // Japanese 'zh', // Chinese 'ko', // Korean 'th', // Thai 'vi', // Vietnamese ] export function hasMutedWord({ mutedWords, text, facets, outlineTags, languages, }: { mutedWords: AppBskyActorDefs.MutedWord[] text: string facets?: AppBskyRichtextFacet.Main[] outlineTags?: string[] languages?: string[] }) { const exception = LANGUAGE_EXCEPTIONS.includes(languages?.[0] || '') const tags = ([] as string[]) .concat(outlineTags || []) .concat( facets ?.filter((facet) => { return facet.features.find((feature) => AppBskyRichtextFacet.isTag(feature), ) }) .map((t) => t.features[0].tag as string) || [], ) .map((t) => t.toLowerCase()) for (const mute of mutedWords) { const mutedWord = mute.value.toLowerCase() const postText = text.toLowerCase() // `content` applies to tags as well if (tags.includes(mutedWord)) return true // rest of the checks are for `content` only if (!mute.targets.includes('content')) continue // single character or other exception, has to use includes if ((mutedWord.length === 1 || exception) && postText.includes(mutedWord)) return true // too long if (mutedWord.length > postText.length) continue // exact match if (mutedWord === postText) return true // any muted phrase with space or punctuation if (/(?:\s|\p{P})+?/u.test(mutedWord) && postText.includes(mutedWord)) return true // check individual character groups const words = postText.split(REGEX.WORD_BOUNDARY) for (const word of words) { if (word === mutedWord) return true // compare word without leading/trailing punctuation, but allow internal // punctuation (such as `s@ssy`) const wordTrimmedPunctuation = word.replace( REGEX.LEADING_TRAILING_PUNCTUATION, '', ) if (mutedWord === wordTrimmedPunctuation) return true if (mutedWord.length > wordTrimmedPunctuation.length) continue if (/\p{P}+/u.test(wordTrimmedPunctuation)) { const spacedWord = wordTrimmedPunctuation.replace(/\p{P}+/gu, ' ') if (spacedWord === mutedWord) return true const contiguousWord = spacedWord.replace(/\s/gu, '') if (contiguousWord === mutedWord) return true const wordParts = wordTrimmedPunctuation.split(/\p{P}+/u) for (const wordPart of wordParts) { if (wordPart === mutedWord) return true } } } } return false }