atproto/packages/api/tests/rich-text-detection.test.ts
Matthieu Sieben b934b396b1
Client SDK rework (#2483)
* feat(api): support creation of oauth based AtpAgents

* oauth: misc fixes for confidential clients

* fix(xprc): remove ReadableStream.from polyfill

* OAuth docs tweaks (#2679)

* OAuth: clarification about client_name being shown

* OAuth: re-write handle resolution privacy concern

* avoid relying on ReadableStream.from in xrpc-server tests

* feat(oauth-types): expose "ALLOW_UNSECURE_ORIGINS" constant

* feat(handle-resolver): expose "AtprotoIdentityDidMethods" type

* fix(oauth-client): ensure that the oauth metadata document contains client_id_metadata_document_supported

* fix(oauth-types): prevent unknown query string in loopback client id

* fix(identity-resolver): check that handle is in did doc's "alsoKnownAs"

* feat(oauth-client:oauth-resolver): allow logging in using either the PDS URL or Entryway URL

* fix(oauth-client): return better error in case of invalid "oauth-protected-resource" status code

* refactor(did): group atproto specific checks in own

* feat(api): relax typing of "appLabelers" and "labelers" AtpClient properties

* allow any did as labeller (for tests mainly)

* fix(api): allow to override "atproto-proxy" on a per-request basis

* remove release candidate versions from changelog

* update changeset for api and xrpc packages

* Add missing changeset

* revert RC versions

* Proper wording in OAUTH.md api example

* remove "pre" changeset file

* xrpc: restore original behavior of setHEader and unsetHeader

* docs: add comment for XrpcClient 's constructor arg

* feat(api): expose "schemas" publicly

* feat(api): allow customizing the whatwg fetch function of the AtpAgent

* docs(api): improve migration docs

* docs: change reference to BskyAgent to AtpAgent

* docs: mention the breaking change regarding setSessionPersistHandler

* fix(api): better split AtpClient concerns

* fix(xrpc): remove unused import

* refactor(api): simplify class hierarchu by removeing AtpClient

* fix(api): mock proper method for facets detection

* restore ability to restore session asynchronously

* feat(api): allow instantiating Agent with same argument as super class

* docs(api): properly extend Agent class

* style(xrpc): var name

* docs(api): remove "async" to header getter

---------

Co-authored-by: Devin Ivy <devinivy@gmail.com>
Co-authored-by: bnewbold <bnewbold@robocracy.org>
Co-authored-by: Hailey <me@haileyok.com>
2024-08-12 19:57:21 +02:00

389 lines
11 KiB
TypeScript
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { AtpAgent, RichText, RichTextSegment } from '../src'
import { isTag } from '../src/client/types/app/bsky/richtext/facet'
describe('detectFacets', () => {
const agent = new AtpAgent({ service: 'http://localhost' })
// Mock handle resolution
agent.com.atproto.identity.resolveHandle = async (params) => ({
success: true,
headers: {},
data: { did: `did:fake:${params?.handle}` },
})
const inputs = [
'no mention',
'@handle.com middle end',
'start @handle.com end',
'start middle @handle.com',
'@handle.com @handle.com @handle.com',
'@full123-chars.test',
'not@right',
'@handle.com!@#$chars',
'@handle.com\n@handle.com',
'parenthetical (@handle.com)',
'👨‍👩‍👧‍👧 @handle.com 👨‍👩‍👧‍👧',
'start https://middle.com end',
'start https://middle.com/foo/bar end',
'start https://middle.com/foo/bar?baz=bux end',
'start https://middle.com/foo/bar?baz=bux#hash end',
'https://start.com/foo/bar?baz=bux#hash middle end',
'start middle https://end.com/foo/bar?baz=bux#hash',
'https://newline1.com\nhttps://newline2.com',
'👨‍👩‍👧‍👧 https://middle.com 👨‍👩‍👧‍👧',
'start middle.com end',
'start middle.com/foo/bar end',
'start middle.com/foo/bar?baz=bux end',
'start middle.com/foo/bar?baz=bux#hash end',
'start.com/foo/bar?baz=bux#hash middle end',
'start middle end.com/foo/bar?baz=bux#hash',
'newline1.com\nnewline2.com',
'a example.com/index.php php link',
'a trailing bsky.app: colon',
'not.. a..url ..here',
'e.g.',
'something-cool.jpg',
'website.com.jpg',
'e.g./foo',
'website.com.jpg/foo',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
'https://foo.com https://bar.com/whatever https://baz.com',
'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
'parenthentical (https://foo.com)',
'except for https://foo.com/thing_(cool)',
]
const outputs: string[][][] = [
[['no mention']],
[['@handle.com', 'did:fake:handle.com'], [' middle end']],
[['start '], ['@handle.com', 'did:fake:handle.com'], [' end']],
[['start middle '], ['@handle.com', 'did:fake:handle.com']],
[
['@handle.com', 'did:fake:handle.com'],
[' '],
['@handle.com', 'did:fake:handle.com'],
[' '],
['@handle.com', 'did:fake:handle.com'],
],
[['@full123-chars.test', 'did:fake:full123-chars.test']],
[['not@right']],
[['@handle.com', 'did:fake:handle.com'], ['!@#$chars']],
[
['@handle.com', 'did:fake:handle.com'],
['\n'],
['@handle.com', 'did:fake:handle.com'],
],
[['parenthetical ('], ['@handle.com', 'did:fake:handle.com'], [')']],
[['👨‍👩‍👧‍👧 '], ['@handle.com', 'did:fake:handle.com'], [' 👨‍👩‍👧‍👧']],
[['start '], ['https://middle.com', 'https://middle.com'], [' end']],
[
['start '],
['https://middle.com/foo/bar', 'https://middle.com/foo/bar'],
[' end'],
],
[
['start '],
[
'https://middle.com/foo/bar?baz=bux',
'https://middle.com/foo/bar?baz=bux',
],
[' end'],
],
[
['start '],
[
'https://middle.com/foo/bar?baz=bux#hash',
'https://middle.com/foo/bar?baz=bux#hash',
],
[' end'],
],
[
[
'https://start.com/foo/bar?baz=bux#hash',
'https://start.com/foo/bar?baz=bux#hash',
],
[' middle end'],
],
[
['start middle '],
[
'https://end.com/foo/bar?baz=bux#hash',
'https://end.com/foo/bar?baz=bux#hash',
],
],
[
['https://newline1.com', 'https://newline1.com'],
['\n'],
['https://newline2.com', 'https://newline2.com'],
],
[['👨‍👩‍👧‍👧 '], ['https://middle.com', 'https://middle.com'], [' 👨‍👩‍👧‍👧']],
[['start '], ['middle.com', 'https://middle.com'], [' end']],
[
['start '],
['middle.com/foo/bar', 'https://middle.com/foo/bar'],
[' end'],
],
[
['start '],
['middle.com/foo/bar?baz=bux', 'https://middle.com/foo/bar?baz=bux'],
[' end'],
],
[
['start '],
[
'middle.com/foo/bar?baz=bux#hash',
'https://middle.com/foo/bar?baz=bux#hash',
],
[' end'],
],
[
[
'start.com/foo/bar?baz=bux#hash',
'https://start.com/foo/bar?baz=bux#hash',
],
[' middle end'],
],
[
['start middle '],
['end.com/foo/bar?baz=bux#hash', 'https://end.com/foo/bar?baz=bux#hash'],
],
[
['newline1.com', 'https://newline1.com'],
['\n'],
['newline2.com', 'https://newline2.com'],
],
[
['a '],
['example.com/index.php', 'https://example.com/index.php'],
[' php link'],
],
[['a trailing '], ['bsky.app', 'https://bsky.app'], [': colon']],
[['not.. a..url ..here']],
[['e.g.']],
[['something-cool.jpg']],
[['website.com.jpg']],
[['e.g./foo']],
[['website.com.jpg/foo']],
[
['Classic article '],
[
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
],
],
[
['Classic article '],
[
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
],
[' '],
],
[
['https://foo.com', 'https://foo.com'],
[' '],
['https://bar.com/whatever', 'https://bar.com/whatever'],
[' '],
['https://baz.com', 'https://baz.com'],
],
[
['punctuation '],
['https://foo.com', 'https://foo.com'],
[', '],
['https://bar.com/whatever', 'https://bar.com/whatever'],
['; '],
['https://baz.com', 'https://baz.com'],
['.'],
],
[['parenthentical ('], ['https://foo.com', 'https://foo.com'], [')']],
[
['except for '],
['https://foo.com/thing_(cool)', 'https://foo.com/thing_(cool)'],
],
]
it('correctly handles a set of text inputs', async () => {
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i]
const rt = new RichText({ text: input })
await rt.detectFacets(agent)
expect(Array.from(rt.segments(), segmentToOutput)).toEqual(outputs[i])
}
})
describe('correctly detects tags inline', () => {
const inputs: [
string,
string[],
{ byteStart: number; byteEnd: number }[],
][] = [
['#a', ['a'], [{ byteStart: 0, byteEnd: 2 }]],
[
'#a #b',
['a', 'b'],
[
{ byteStart: 0, byteEnd: 2 },
{ byteStart: 3, byteEnd: 5 },
],
],
['#1', [], []],
['#1a', ['1a'], [{ byteStart: 0, byteEnd: 3 }]],
['#tag', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
['#tag body', ['tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag body', ['tag'], [{ byteStart: 5, byteEnd: 9 }]],
['body #1', [], []],
['body #1a', ['1a'], [{ byteStart: 5, byteEnd: 8 }]],
['body #a1', ['a1'], [{ byteStart: 5, byteEnd: 8 }]],
['#', [], []],
['#?', [], []],
['text #', [], []],
['text # text', [], []],
[
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
['thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
[{ byteStart: 5, byteEnd: 70 }],
],
[
'body #thisisa65characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
[],
[],
],
[
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!',
['thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
[{ byteStart: 5, byteEnd: 70 }],
],
[
'its a #double#rainbow',
['double#rainbow'],
[{ byteStart: 6, byteEnd: 21 }],
],
['##hashash', ['#hashash'], [{ byteStart: 0, byteEnd: 9 }]],
['##', [], []],
['some #n0n3s@n5e!', ['n0n3s@n5e'], [{ byteStart: 5, byteEnd: 15 }]],
[
'works #with,punctuation',
['with,punctuation'],
[{ byteStart: 6, byteEnd: 23 }],
],
[
'strips trailing #punctuation, #like. #this!',
['punctuation', 'like', 'this'],
[
{ byteStart: 16, byteEnd: 28 },
{ byteStart: 30, byteEnd: 35 },
{ byteStart: 37, byteEnd: 42 },
],
],
[
'strips #multi_trailing___...',
['multi_trailing'],
[{ byteStart: 7, byteEnd: 22 }],
],
[
'works with #🦋 emoji, and #butter🦋fly',
['🦋', 'butter🦋fly'],
[
{ byteStart: 11, byteEnd: 16 },
{ byteStart: 28, byteEnd: 42 },
],
],
[
'#same #same #but #diff',
['same', 'same', 'but', 'diff'],
[
{ byteStart: 0, byteEnd: 5 },
{ byteStart: 6, byteEnd: 11 },
{ byteStart: 12, byteEnd: 16 },
{ byteStart: 17, byteEnd: 22 },
],
],
['this #⃣tag should not be a tag', [], []],
[
'this ##⃣tag should be a tag',
['#⃣tag'],
[
{
byteStart: 5,
byteEnd: 16,
},
],
],
[
'this #t\nag should be a tag',
['t'],
[
{
byteStart: 5,
byteEnd: 7,
},
],
],
['no match (\\u200B): #', [], []],
['no match (\\u200Ba): #a', [], []],
['match (a\\u200Bb): #ab', ['a'], [{ byteStart: 18, byteEnd: 20 }]],
['match (ab\\u200B): #ab', ['ab'], [{ byteStart: 18, byteEnd: 21 }]],
['no match (\\u20e2tag): #⃢tag', [], []],
['no match (a\\u20e2b): #a⃢b', ['a'], [{ byteStart: 21, byteEnd: 23 }]],
[
'match full width number sign (tag): tag',
['tag'],
[{ byteStart: 36, byteEnd: 42 }],
],
[
'match full width number sign (tag): #⃣tag',
['#⃣tag'],
[{ byteStart: 36, byteEnd: 49 }],
],
['no match 1?: #1?', [], []],
]
it.each(inputs)('%s', async (input, tags, indices) => {
const rt = new RichText({ text: input })
await rt.detectFacets(agent)
const detectedTags: string[] = []
const detectedIndices: { byteStart: number; byteEnd: number }[] = []
for (const { facet } of rt.segments()) {
if (!facet) continue
for (const feature of facet.features) {
if (isTag(feature)) {
detectedTags.push(feature.tag)
}
}
detectedIndices.push(facet.index)
}
expect(detectedTags).toEqual(tags)
expect(detectedIndices).toEqual(indices)
})
})
})
function segmentToOutput(segment: RichTextSegment): string[] {
if (segment.facet) {
return [
segment.text,
segment.facet?.features.map((f) => {
if (f.did) {
return String(f.did)
}
if (f.uri) {
return String(f.uri)
}
return undefined
})?.[0] || '',
]
}
return [segment.text]
}