Eric/tag sanitization (#2247)

* Don't remove hash from muted words * Split out crud actions, only sanitize on inserts * Add changeset * Handle hash emoji in mute words * Add sanitization for invalid chars * Remove console * Add util * Clean up changesets * Format * Wow forgot to commit change
2024-02-29 13:06:55 -06:00 · 2024-02-29 13:06:55 -06:00 · 2a0ceb8180
commit 2a0ceb8180
parent 1b0b4f93ac
9 changed files with 265 additions and 81 deletions
--- a/.changeset/rotten-actors-dance.md
+++ b/.changeset/rotten-actors-dance.md
@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+Fix double sanitization bug when editing muted words.
--- a/.changeset/silly-carrots-repair.md
+++ b/.changeset/silly-carrots-repair.md
@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+More sanitization of muted words, including newlines and leading/trailing whitespace
--- a/.changeset/small-dragons-cry.md
+++ b/.changeset/small-dragons-cry.md
@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+Add `sanitizeMutedWordValue` util
--- a/.changeset/sour-gorillas-unite.md
+++ b/.changeset/sour-gorillas-unite.md
@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+Handle hash emoji in mute words
--- a/packages/api/src/bsky-agent.ts
+++ b/packages/api/src/bsky-agent.ts
@ -13,6 +13,7 @@ import {
  BskyThreadViewPreference,
  BskyInterestsPreference,
 } from './types'
+import { sanitizeMutedWordValue } from './util'

 const FEED_VIEW_PREF_DEFAULTS = {
  hideReplies: false,
@ -565,16 +566,108 @@ export class BskyAgent extends AtpAgent {
    })
  }

-  async upsertMutedWords(mutedWords: AppBskyActorDefs.MutedWord[]) {
-    await updateMutedWords(this, mutedWords, 'upsert')
+  async upsertMutedWords(newMutedWords: AppBskyActorDefs.MutedWord[]) {
+    await updatePreferences(this, (prefs: AppBskyActorDefs.Preferences) => {
+      let mutedWordsPref = prefs.findLast(
+        (pref) =>
+          AppBskyActorDefs.isMutedWordsPref(pref) &&
+          AppBskyActorDefs.validateMutedWordsPref(pref).success,
+      )
+
+      if (mutedWordsPref && AppBskyActorDefs.isMutedWordsPref(mutedWordsPref)) {
+        for (const updatedWord of newMutedWords) {
+          let foundMatch = false
+          const sanitizedUpdatedValue = sanitizeMutedWordValue(
+            updatedWord.value,
+          )
+
+          // was trimmed down to an empty string e.g. single `#`
+          if (!sanitizedUpdatedValue) continue
+
+          for (const existingItem of mutedWordsPref.items) {
+            if (existingItem.value === sanitizedUpdatedValue) {
+              existingItem.targets = Array.from(
+                new Set([...existingItem.targets, ...updatedWord.targets]),
+              )
+              foundMatch = true
+              break
+            }
+          }
+
+          if (!foundMatch) {
+            mutedWordsPref.items.push({
+              ...updatedWord,
+              value: sanitizedUpdatedValue,
+            })
+          }
+        }
+      } else {
+        // if the pref doesn't exist, create it
+        mutedWordsPref = {
+          items: newMutedWords.map((w) => ({
+            ...w,
+            value: sanitizeMutedWordValue(w.value),
+          })),
+        }
+      }
+
+      return prefs
+        .filter((p) => !AppBskyActorDefs.isMutedWordsPref(p))
+        .concat([
+          { ...mutedWordsPref, $type: 'app.bsky.actor.defs#mutedWordsPref' },
+        ])
+    })
  }

  async updateMutedWord(mutedWord: AppBskyActorDefs.MutedWord) {
-    await updateMutedWords(this, [mutedWord], 'update')
+    await updatePreferences(this, (prefs: AppBskyActorDefs.Preferences) => {
+      let mutedWordsPref = prefs.findLast(
+        (pref) =>
+          AppBskyActorDefs.isMutedWordsPref(pref) &&
+          AppBskyActorDefs.validateMutedWordsPref(pref).success,
+      )
+
+      if (mutedWordsPref && AppBskyActorDefs.isMutedWordsPref(mutedWordsPref)) {
+        for (const existingItem of mutedWordsPref.items) {
+          if (existingItem.value === mutedWord.value) {
+            existingItem.targets = mutedWord.targets
+            break
+          }
+        }
+      }
+
+      return prefs
+        .filter((p) => !AppBskyActorDefs.isMutedWordsPref(p))
+        .concat([
+          { ...mutedWordsPref, $type: 'app.bsky.actor.defs#mutedWordsPref' },
+        ])
+    })
  }

  async removeMutedWord(mutedWord: AppBskyActorDefs.MutedWord) {
-    await updateMutedWords(this, [mutedWord], 'remove')
+    await updatePreferences(this, (prefs: AppBskyActorDefs.Preferences) => {
+      let mutedWordsPref = prefs.findLast(
+        (pref) =>
+          AppBskyActorDefs.isMutedWordsPref(pref) &&
+          AppBskyActorDefs.validateMutedWordsPref(pref).success,
+      )
+
+      if (mutedWordsPref && AppBskyActorDefs.isMutedWordsPref(mutedWordsPref)) {
+        for (let i = 0; i < mutedWordsPref.items.length; i++) {
+          const existing = mutedWordsPref.items[i]
+          if (existing.value === mutedWord.value) {
+            mutedWordsPref.items.splice(i, 1)
+            break
+          }
+        }
+      }
+
+      return prefs
+        .filter((p) => !AppBskyActorDefs.isMutedWordsPref(p))
+        .concat([
+          { ...mutedWordsPref, $type: 'app.bsky.actor.defs#mutedWordsPref' },
+        ])
+    })
  }

  async hidePost(postUri: string) {
@ -646,76 +739,6 @@ async function updateFeedPreferences(
  return res
 }

-/**
- * A helper specifically for updating muted words preferences
- */
-async function updateMutedWords(
-  agent: BskyAgent,
-  mutedWords: AppBskyActorDefs.MutedWord[],
-  action: 'upsert' | 'update' | 'remove',
-) {
-  const sanitizeMutedWord = (word: AppBskyActorDefs.MutedWord) => ({
-    value: word.value.replace(/^#/, ''),
-    targets: word.targets,
-  })
-
-  await updatePreferences(agent, (prefs: AppBskyActorDefs.Preferences) => {
-    let mutedWordsPref = prefs.findLast(
-      (pref) =>
-        AppBskyActorDefs.isMutedWordsPref(pref) &&
-        AppBskyActorDefs.validateMutedWordsPref(pref).success,
-    )
-
-    if (mutedWordsPref && AppBskyActorDefs.isMutedWordsPref(mutedWordsPref)) {
-      if (action === 'upsert' || action === 'update') {
-        for (const word of mutedWords) {
-          let foundMatch = false
-
-          for (const existingItem of mutedWordsPref.items) {
-            if (existingItem.value === sanitizeMutedWord(word).value) {
-              existingItem.targets =
-                action === 'upsert'
-                  ? Array.from(
-                      new Set([...existingItem.targets, ...word.targets]),
-                    )
-                  : word.targets
-              foundMatch = true
-              break
-            }
-          }
-
-          if (action === 'upsert' && !foundMatch) {
-            mutedWordsPref.items.push(sanitizeMutedWord(word))
-          }
-        }
-      } else if (action === 'remove') {
-        for (const word of mutedWords) {
-          for (let i = 0; i < mutedWordsPref.items.length; i++) {
-            const existing = mutedWordsPref.items[i]
-            if (existing.value === sanitizeMutedWord(word).value) {
-              mutedWordsPref.items.splice(i, 1)
-              break
-            }
-          }
-        }
-      }
-    } else {
-      // if the pref doesn't exist, create it
-      if (action === 'upsert') {
-        mutedWordsPref = {
-          items: mutedWords.map(sanitizeMutedWord),
-        }
-      }
-    }
-
-    return prefs
-      .filter((p) => !AppBskyActorDefs.isMutedWordsPref(p))
-      .concat([
-        { ...mutedWordsPref, $type: 'app.bsky.actor.defs#mutedWordsPref' },
-      ])
-  })
-}
-
 async function updateHiddenPost(
  agent: BskyAgent,
  postUri: string,
--- a/packages/api/src/index.ts
+++ b/packages/api/src/index.ts
@ -8,6 +8,7 @@ export {
 } from '@atproto/lexicon'
 export { parseLanguage } from '@atproto/common-web'
 export * from './types'
+export * from './util'
 export * from './client'
 export * from './agent'
 export * from './rich-text/rich-text'
--- a/packages/api/src/util.ts
+++ b/packages/api/src/util.ts
@ -0,0 +1,6 @@
+export function sanitizeMutedWordValue(value: string) {
+  return value
+    .trim()
+    .replace(/^#(?!\ufe0f)/, '')
+    .replace(/[\r\n\u00AD\u2060\u200D\u200C\u200B]+/, '')
+}
--- a/packages/api/tests/bsky-agent.test.ts
+++ b/packages/api/tests/bsky-agent.test.ts
@ -1202,13 +1202,18 @@ describe('agent', () => {
        await agent.upsertMutedWords([
          { value: 'hashtag', targets: ['content'] },
        ])
+        // is sanitized to `hashtag`
        await agent.upsertMutedWords([{ value: '#hashtag', targets: ['tag'] }])
+
        const { mutedWords } = await agent.getPreferences()
+
        expect(mutedWords.find((m) => m.value === '#hashtag')).toBeFalsy()
+        // merged with existing
        expect(mutedWords.find((m) => m.value === 'hashtag')).toStrictEqual({
          value: 'hashtag',
          targets: ['content', 'tag'],
        })
+        // only one added
        expect(mutedWords.filter((m) => m.value === 'hashtag').length).toBe(1)
      })

@ -1237,15 +1242,21 @@ describe('agent', () => {
        expect(mutedWords.find((m) => m.value === 'no_exist')).toBeFalsy()
      })

-      it('updateMutedWord with #', async () => {
+      it('updateMutedWord with #, does not update', async () => {
+        await agent.upsertMutedWords([
+          {
+            value: '#just_a_tag',
+            targets: ['tag'],
+          },
+        ])
        await agent.updateMutedWord({
-          value: 'hashtag',
+          value: '#just_a_tag',
          targets: ['tag', 'content'],
        })
        const { mutedWords } = await agent.getPreferences()
-        expect(mutedWords.find((m) => m.value === 'hashtag')).toStrictEqual({
-          value: 'hashtag',
-          targets: ['tag', 'content'],
+        expect(mutedWords.find((m) => m.value === 'just_a_tag')).toStrictEqual({
+          value: 'just_a_tag',
+          targets: ['tag'],
        })
      })

@ -1262,11 +1273,124 @@ describe('agent', () => {
        expect(mutedWords.find((m) => m.value === 'tag_then_none')).toBeFalsy()
      })

-      it('removeMutedWord with #', async () => {
+      it('removeMutedWord with #, no match, no removal', async () => {
        await agent.removeMutedWord({ value: '#hashtag', targets: [] })
        const { mutedWords } = await agent.getPreferences()

-        expect(mutedWords.find((m) => m.value === 'hashtag')).toBeFalsy()
+        // was inserted with #hashtag, but we don't sanitize on remove
+        expect(mutedWords.find((m) => m.value === 'hashtag')).toBeTruthy()
+      })
+
+      it('single-hash #', async () => {
+        const prev = await agent.getPreferences()
+        const length = prev.mutedWords.length
+        await agent.upsertMutedWords([{ value: '#', targets: [] }])
+        const end = await agent.getPreferences()
+
+        // sanitized to empty string, not inserted
+        expect(end.mutedWords.length).toEqual(length)
+      })
+
+      it('multi-hash ##', async () => {
+        await agent.upsertMutedWords([{ value: '##', targets: [] }])
+        const { mutedWords } = await agent.getPreferences()
+
+        expect(mutedWords.find((m) => m.value === '#')).toBeTruthy()
+      })
+
+      it('multi-hash ##hashtag', async () => {
+        await agent.upsertMutedWords([{ value: '##hashtag', targets: [] }])
+        const a = await agent.getPreferences()
+
+        expect(a.mutedWords.find((w) => w.value === '#hashtag')).toBeTruthy()
+
+        await agent.removeMutedWord({ value: '#hashtag', targets: [] })
+        const b = await agent.getPreferences()
+
+        expect(b.mutedWords.find((w) => w.value === '#hashtag')).toBeFalsy()
+      })
+
+      it('hash emoji #️⃣', async () => {
+        await agent.upsertMutedWords([{ value: '#️⃣', targets: [] }])
+        const { mutedWords } = await agent.getPreferences()
+
+        expect(mutedWords.find((m) => m.value === '#️⃣')).toBeTruthy()
+
+        await agent.removeMutedWord({ value: '#️⃣', targets: [] })
+        const end = await agent.getPreferences()
+
+        expect(end.mutedWords.find((m) => m.value === '#️⃣')).toBeFalsy()
+      })
+
+      it('hash emoji ##️⃣', async () => {
+        await agent.upsertMutedWords([{ value: '##️⃣', targets: [] }])
+        const { mutedWords } = await agent.getPreferences()
+
+        expect(mutedWords.find((m) => m.value === '#️⃣')).toBeTruthy()
+
+        await agent.removeMutedWord({ value: '#️⃣', targets: [] })
+        const end = await agent.getPreferences()
+
+        expect(end.mutedWords.find((m) => m.value === '#️⃣')).toBeFalsy()
+      })
+
+      it('hash emoji ###️⃣', async () => {
+        await agent.upsertMutedWords([{ value: '###️⃣', targets: [] }])
+        const { mutedWords } = await agent.getPreferences()
+
+        expect(mutedWords.find((m) => m.value === '##️⃣')).toBeTruthy()
+
+        await agent.removeMutedWord({ value: '##️⃣', targets: [] })
+        const end = await agent.getPreferences()
+
+        expect(end.mutedWords.find((m) => m.value === '##️⃣')).toBeFalsy()
+      })
+
+      describe(`invalid characters`, () => {
+        it('zero width space', async () => {
+          const prev = await agent.getPreferences()
+          const length = prev.mutedWords.length
+          await agent.upsertMutedWords([{ value: '#', targets: [] }])
+          const { mutedWords } = await agent.getPreferences()
+
+          expect(mutedWords.length).toEqual(length)
+        })
+
+        it('newline', async () => {
+          await agent.upsertMutedWords([
+            { value: 'test value\n with newline', targets: [] },
+          ])
+          const { mutedWords } = await agent.getPreferences()
+
+          expect(
+            mutedWords.find((m) => m.value === 'test value with newline'),
+          ).toBeTruthy()
+        })
+
+        it('newline(s)', async () => {
+          await agent.upsertMutedWords([
+            { value: 'test value\n\r with newline', targets: [] },
+          ])
+          const { mutedWords } = await agent.getPreferences()
+
+          expect(
+            mutedWords.find((m) => m.value === 'test value with newline'),
+          ).toBeTruthy()
+        })
+
+        it('empty space', async () => {
+          await agent.upsertMutedWords([{ value: ' ', targets: [] }])
+          const { mutedWords } = await agent.getPreferences()
+
+          expect(mutedWords.find((m) => m.value === ' ')).toBeFalsy()
+        })
+
+        it('leading/trailing space', async () => {
+          await agent.upsertMutedWords([{ value: ' trim ', targets: [] }])
+          const { mutedWords } = await agent.getPreferences()
+
+          expect(mutedWords.find((m) => m.value === 'trim')).toBeTruthy()
+        })
      })
    })

--- a/packages/api/tests/rich-text-detection.test.ts
+++ b/packages/api/tests/rich-text-detection.test.ts
@ -309,6 +309,16 @@ describe('detectFacets', () => {
          },
        ],
      ],
+      [
+        'this #t\nag should be a tag',
+        ['t'],
+        [
+          {
+            byteStart: 5,
+            byteEnd: 7,
+          },
+        ],
+      ],
    ]

    for (const [input, tags, indices] of inputs) {