Handle apostrophes and other punctuation when muting words (#2344)

Support muted words with apostrophes/punct
2024-03-19 15:59:20 -05:00 · 2024-03-19 15:59:20 -05:00 · abc6f82da3
commit abc6f82da3
parent b4346727f7
4 changed files with 100 additions and 32 deletions
--- a/.changeset/big-houses-talk.md
+++ b/.changeset/big-houses-talk.md
@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+Support muting words that contain apostrophes and other punctuation
--- a/packages/api/src/moderation/mutewords.ts
+++ b/packages/api/src/moderation/mutewords.ts
@ -82,38 +82,16 @@ export function hasMutedWord({
      if (mutedWord === wordTrimmedPunctuation) return true
      if (mutedWord.length > wordTrimmedPunctuation.length) continue

-      // handle hyphenated, slash separated words, etc
-      if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) {
-        // check against full normalized phrase
-        const wordNormalizedSeparators = wordTrimmedPunctuation.replace(
-          REGEX.SEPARATORS,
-          ' ',
-        )
-        const mutedWordNormalizedSeparators = mutedWord.replace(
-          REGEX.SEPARATORS,
-          ' ',
-        )
-        // hyphenated (or other sep) to spaced words
-        if (wordNormalizedSeparators === mutedWordNormalizedSeparators)
-          return true
+      if (/\p{P}+/u.test(wordTrimmedPunctuation)) {
+        const spacedWord = wordTrimmedPunctuation.replace(/\p{P}+/gu, ' ')
+        if (spacedWord === mutedWord) return true

-        /* Disabled for now e.g. `super-cool` to `supercool`
-        const wordNormalizedCompressed = wordNormalizedSeparators.replace(
-          REGEX.WORD_BOUNDARY,
-          '',
-        )
-        const mutedWordNormalizedCompressed =
-          mutedWordNormalizedSeparators.replace(/\s+?/g, '')
-        // hyphenated (or other sep) to non-hyphenated contiguous word
-        if (mutedWordNormalizedCompressed === wordNormalizedCompressed)
-          return true
-        */
+        const contiguousWord = spacedWord.replace(/\s/gu, '')
+        if (contiguousWord === mutedWord) return true

-        // then individual parts of separated phrases/words
-        const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS)
-        for (const wp of wordParts) {
-          // still retain internal punctuation
-          if (wp === mutedWord) return true
+        const wordParts = wordTrimmedPunctuation.split(/\p{P}+/u)
+        for (const wordPart of wordParts) {
+          if (wordPart === mutedWord) return true
        }
      }
    }
--- a/packages/api/tests/bsky-agent.test.ts
+++ b/packages/api/tests/bsky-agent.test.ts
@ -1582,6 +1582,13 @@ describe('agent', () => {
        expect(end.mutedWords.find((m) => m.value === '##️⃣')).toBeFalsy()
      })

+      it(`apostrophe: Bluesky's`, async () => {
+        await agent.upsertMutedWords([{ value: `Bluesky's`, targets: [] }])
+        const { mutedWords } = (await agent.getPreferences()).moderationPrefs
+
+        expect(mutedWords.find((m) => m.value === `Bluesky's`)).toBeTruthy()
+      })
+
      describe(`invalid characters`, () => {
        it('zero width space', async () => {
          const prev = (await agent.getPreferences()).moderationPrefs
--- a/packages/api/tests/moderation-mutewords.test.ts
+++ b/packages/api/tests/moderation-mutewords.test.ts
@ -89,6 +89,22 @@ describe(`hasMutedWord`, () => {
      expect(match).toBe(true)
    })

+    it(`match: single char with length > 1 ☠︎`, () => {
+      const rt = new RichText({
+        text: `Idk why ☠︎ but maybe`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord({
+        mutedWords: [{ value: '☠︎', targets: ['content'] }],
+        text: rt.text,
+        facets: rt.facets,
+        outlineTags: [],
+      })
+
+      expect(match).toBe(true)
+    })
+
    it(`no match: long muted word, short post`, () => {
      const rt = new RichText({
        text: `hey`,
@ -248,6 +264,57 @@ describe(`hasMutedWord`, () => {
      })
    })

+    describe(`apostrophes: Bluesky's`, () => {
+      const rt = new RichText({
+        text: `Yay, Bluesky's mutewords work`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: Bluesky's`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: `Bluesky's`, targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: Bluesky`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'Bluesky', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: bluesky`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'bluesky', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: blueskys`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'blueskys', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+    })
+
    describe(`Why so S@assy?`, () => {
      const rt = new RichText({
        text: `Why so S@assy?`,
@ -398,6 +465,17 @@ describe(`hasMutedWord`, () => {
        expect(match).toBe(true)
      })

+      it(`match: bad`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: `bad`, targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
      it(`match: super bad`, () => {
        const match = hasMutedWord({
          mutedWords: [{ value: `super bad`, targets: ['content'] }],
@ -417,7 +495,7 @@ describe(`hasMutedWord`, () => {
          outlineTags: [],
        })

-        expect(match).toBe(false)
+        expect(match).toBe(true)
      })
    })

@ -474,7 +552,7 @@ describe(`hasMutedWord`, () => {
          outlineTags: [],
        })

-        expect(match).toBe(false)
+        expect(match).toBe(true)
      })
    })