✨ Detect language from record content if lang property is not set (#2301)
* ✨ Detect language from record content if lang property is not set * ✅ Update test snapshots for auto detected language * ✅ Update pds test snapshots * ✅ Adjust test to use the right method * 🧹 Make list override param optional * 🧹 Fix import * 🚨 fix linter issues * ♻️ Use record type for getting text from record * ✅ process records for appview to pick it up * 🧹 Cleanup
This commit is contained in:
parent
d0625be266
commit
6f1f54493e
packages
pnpm-lock.yaml@ -6,6 +6,7 @@ import AtpAgent, {
|
||||
AppBskyRichtextFacet,
|
||||
AppBskyFeedLike,
|
||||
AppBskyGraphFollow,
|
||||
AppBskyGraphList,
|
||||
} from '@atproto/api'
|
||||
import { AtUri } from '@atproto/syntax'
|
||||
import { BlobRef } from '@atproto/lexicon'
|
||||
@ -399,7 +400,12 @@ export class SeedClient<
|
||||
return repost
|
||||
}
|
||||
|
||||
async createList(by: string, name: string, purpose: 'mod' | 'curate') {
|
||||
async createList(
|
||||
by: string,
|
||||
name: string,
|
||||
purpose: 'mod' | 'curate',
|
||||
overrides?: Partial<AppBskyGraphList.Record>,
|
||||
) {
|
||||
const res = await this.agent.api.app.bsky.graph.list.create(
|
||||
{ repo: by },
|
||||
{
|
||||
@ -409,6 +415,7 @@ export class SeedClient<
|
||||
? 'app.bsky.graph.defs#modlist'
|
||||
: 'app.bsky.graph.defs#curatelist',
|
||||
createdAt: new Date().toISOString(),
|
||||
...(overrides || {}),
|
||||
},
|
||||
this.getHeaders(by),
|
||||
)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/** @type {import('jest').Config} */
|
||||
module.exports = {
|
||||
displayName: 'Bsky App View',
|
||||
displayName: 'Ozone',
|
||||
transform: { '^.+\\.(t|j)s$': '@swc/jest' },
|
||||
transformIgnorePatterns: [`<rootDir>/node_modules/(?!get-port)`],
|
||||
testTimeout: 60000,
|
||||
|
@ -41,6 +41,7 @@
|
||||
"express": "^4.17.2",
|
||||
"http-terminator": "^3.2.0",
|
||||
"kysely": "^0.22.0",
|
||||
"lande": "^1.0.10",
|
||||
"multiformats": "^9.9.0",
|
||||
"p-queue": "^6.6.2",
|
||||
"pg": "^8.10.0",
|
||||
|
561
packages/ozone/src/mod-service/lang-data.ts
Normal file
561
packages/ozone/src/mod-service/lang-data.ts
Normal file
@ -0,0 +1,561 @@
|
||||
// Also used in the client app https://github.com/bluesky-social/social-app/blob/main/src/locale/languages.ts
|
||||
interface Language {
|
||||
code3: string
|
||||
code2: string
|
||||
name: string
|
||||
}
|
||||
|
||||
export const LANGUAGES: Language[] = [
|
||||
{ code3: 'aar', code2: 'aa', name: 'Afar' },
|
||||
{ code3: 'abk', code2: 'ab', name: 'Abkhazian' },
|
||||
{ code3: 'ace', code2: '', name: 'Achinese' },
|
||||
{ code3: 'ach', code2: '', name: 'Acoli' },
|
||||
{ code3: 'ada', code2: '', name: 'Adangme' },
|
||||
{ code3: 'ady', code2: '', name: 'Adyghe; Adygei' },
|
||||
{ code3: 'afa', code2: '', name: 'Afro-Asiatic languages' },
|
||||
{ code3: 'afh', code2: '', name: 'Afrihili' },
|
||||
{ code3: 'afr', code2: 'af', name: 'Afrikaans' },
|
||||
{ code3: 'ain', code2: '', name: 'Ainu' },
|
||||
{ code3: 'aka', code2: 'ak', name: 'Akan' },
|
||||
{ code3: 'akk', code2: '', name: 'Akkadian' },
|
||||
{ code3: 'alb', code2: 'sq', name: 'Albanian' },
|
||||
{ code3: 'ale', code2: '', name: 'Aleut' },
|
||||
{ code3: 'alg', code2: '', name: 'Algonquian languages' },
|
||||
{ code3: 'alt', code2: '', name: 'Southern Altai' },
|
||||
{ code3: 'amh', code2: 'am', name: 'Amharic' },
|
||||
{ code3: 'ang', code2: '', name: 'English, Old (ca.450-1100)' },
|
||||
{ code3: 'anp ', code2: 'Angika', name: 'Angika' },
|
||||
{ code3: 'apa', code2: '', name: 'Apache languages' },
|
||||
{ code3: 'ara', code2: 'ar', name: 'Arabic' },
|
||||
{
|
||||
code3: 'arc',
|
||||
code2: '',
|
||||
name: 'Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)',
|
||||
},
|
||||
{ code3: 'arg', code2: 'an', name: 'Aragonese' },
|
||||
{ code3: 'arm', code2: 'hy', name: 'Armenian' },
|
||||
{ code3: 'arn', code2: '', name: 'Mapudungun; Mapuche' },
|
||||
{ code3: 'arp', code2: '', name: 'Arapaho' },
|
||||
{ code3: 'art', code2: '', name: 'Artificial languages' },
|
||||
{ code3: 'arw', code2: '', name: 'Arawak' },
|
||||
{ code3: 'asm', code2: 'as', name: 'Assamese' },
|
||||
{ code3: 'ast', code2: '', name: 'Asturian; Bable; Leonese; Asturleonese' },
|
||||
{ code3: 'ath', code2: '', name: 'Athapascan languages' },
|
||||
{ code3: 'aus', code2: '', name: 'Australian languages' },
|
||||
{ code3: 'ava', code2: 'av', name: 'Avaric' },
|
||||
{ code3: 'ave', code2: 'ae', name: 'Avestan' },
|
||||
{ code3: 'awa', code2: '', name: 'Awadhi' },
|
||||
{ code3: 'aym', code2: 'ay', name: 'Aymara' },
|
||||
{ code3: 'aze', code2: 'az', name: 'Azerbaijani' },
|
||||
{ code3: 'bad', code2: '', name: 'Banda languages' },
|
||||
{ code3: 'bai', code2: '', name: 'Bamileke languages' },
|
||||
{ code3: 'bak', code2: 'ba', name: 'Bashkir' },
|
||||
{ code3: 'bal', code2: '', name: 'Baluchi' },
|
||||
{ code3: 'bam', code2: 'bm', name: 'Bambara' },
|
||||
{ code3: 'ban', code2: '', name: 'Balinese' },
|
||||
{ code3: 'baq', code2: 'eu', name: 'Basque' },
|
||||
{ code3: 'bas', code2: '', name: 'Basa' },
|
||||
{ code3: 'bat', code2: '', name: 'Baltic languages' },
|
||||
{ code3: 'bej', code2: '', name: 'Beja; Bedawiyet' },
|
||||
{ code3: 'bel', code2: 'be', name: 'Belarusian' },
|
||||
{ code3: 'bem', code2: '', name: 'Bemba' },
|
||||
{ code3: 'ben', code2: 'bn', name: 'Bengali' },
|
||||
{ code3: 'ber', code2: '', name: 'Berber languages' },
|
||||
{ code3: 'bho', code2: '', name: 'Bhojpuri' },
|
||||
{ code3: 'bih', code2: 'bh', name: 'Bihari languages' },
|
||||
{ code3: 'bik', code2: '', name: 'Bikol' },
|
||||
{ code3: 'bin', code2: '', name: 'Bini; Edo' },
|
||||
{ code3: 'bis', code2: 'bi', name: 'Bislama' },
|
||||
{ code3: 'bla', code2: '', name: 'Siksika' },
|
||||
{ code3: 'bnt', code2: '', name: 'Bantu languages' },
|
||||
{ code3: 'bod', code2: 'bo', name: 'Tibetan' },
|
||||
{ code3: 'bos', code2: 'bs', name: 'Bosnian' },
|
||||
{ code3: 'bra', code2: '', name: 'Braj' },
|
||||
{ code3: 'bre', code2: 'br', name: 'Breton' },
|
||||
{ code3: 'btk', code2: '', name: 'Batak languages' },
|
||||
{ code3: 'bua', code2: '', name: 'Buriat' },
|
||||
{ code3: 'bug', code2: '', name: 'Buginese' },
|
||||
{ code3: 'bul', code2: 'bg', name: 'Bulgarian' },
|
||||
{ code3: 'bur', code2: 'my', name: 'Burmese' },
|
||||
{ code3: 'byn', code2: '', name: 'Blin; Bilin' },
|
||||
{ code3: 'cad', code2: '', name: 'Caddo' },
|
||||
{ code3: 'cai', code2: '', name: 'Central American Indian languages' },
|
||||
{ code3: 'car', code2: '', name: 'Galibi Carib' },
|
||||
{ code3: 'cat', code2: 'ca', name: 'Catalan; Valencian' },
|
||||
{ code3: 'cau', code2: '', name: 'Caucasian languages' },
|
||||
{ code3: 'ceb', code2: '', name: 'Cebuano' },
|
||||
{ code3: 'cel', code2: '', name: 'Celtic languages' },
|
||||
{ code3: 'ces', code2: 'cs', name: 'Czech' },
|
||||
{ code3: 'cha', code2: 'ch', name: 'Chamorro' },
|
||||
{ code3: 'chb', code2: '', name: 'Chibcha' },
|
||||
{ code3: 'che', code2: 'ce', name: 'Chechen' },
|
||||
{ code3: 'chg', code2: '', name: 'Chagatai' },
|
||||
{ code3: 'chi', code2: 'zh', name: 'Chinese' },
|
||||
{ code3: 'chk', code2: '', name: 'Chuukese' },
|
||||
{ code3: 'chm', code2: '', name: 'Mari' },
|
||||
{ code3: 'chn', code2: '', name: 'Chinook jargon' },
|
||||
{ code3: 'cho', code2: '', name: 'Choctaw' },
|
||||
{ code3: 'chp', code2: '', name: 'Chipewyan; Dene Suline' },
|
||||
{ code3: 'chr', code2: '', name: 'Cherokee' },
|
||||
{
|
||||
code3: 'chu',
|
||||
code2: 'cu',
|
||||
name: 'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic',
|
||||
},
|
||||
{ code3: 'chv', code2: 'cv', name: 'Chuvash' },
|
||||
{ code3: 'chy', code2: '', name: 'Cheyenne' },
|
||||
{ code3: 'cmc', code2: '', name: 'Chamic languages' },
|
||||
{ code3: 'cnr', code2: '', name: 'Montenegrin' },
|
||||
{ code3: 'cop', code2: '', name: 'Coptic' },
|
||||
{ code3: 'cor', code2: 'kw', name: 'Cornish' },
|
||||
{ code3: 'cos', code2: 'co', name: 'Corsican' },
|
||||
{ code3: 'cpe', code2: '', name: 'Creoles and pidgins, English based' },
|
||||
{ code3: 'cpf', code2: '', name: 'Creoles and pidgins, French-based' },
|
||||
{ code3: 'cpp', code2: '', name: 'Creoles and pidgins, Portuguese-based' },
|
||||
{ code3: 'cre', code2: 'cr', name: 'Cree' },
|
||||
{ code3: 'crh', code2: '', name: 'Crimean Tatar; Crimean Turkish' },
|
||||
{ code3: 'crp', code2: '', name: 'Creoles and pidgins' },
|
||||
{ code3: 'csb', code2: '', name: 'Kashubian' },
|
||||
{ code3: 'cus', code2: '', name: 'Cushitic languages' },
|
||||
{ code3: 'cym', code2: 'cy', name: 'Welsh' },
|
||||
{ code3: 'cze', code2: 'cs', name: 'Czech' },
|
||||
{ code3: 'dak', code2: '', name: 'Dakota' },
|
||||
{ code3: 'dan', code2: 'da', name: 'Danish' },
|
||||
{ code3: 'dar', code2: '', name: 'Dargwa' },
|
||||
{ code3: 'day', code2: '', name: 'Land Dayak languages' },
|
||||
{ code3: 'del', code2: '', name: 'Delaware' },
|
||||
{ code3: 'den', code2: '', name: 'Slave (Athapascan)' },
|
||||
{ code3: 'deu', code2: 'de', name: 'German' },
|
||||
{ code3: 'dgr', code2: '', name: 'Dogrib' },
|
||||
{ code3: 'din', code2: '', name: 'Dinka' },
|
||||
{ code3: 'div', code2: 'dv', name: 'Divehi; Dhivehi; Maldivian' },
|
||||
{ code3: 'doi', code2: '', name: 'Dogri' },
|
||||
{ code3: 'dra', code2: '', name: 'Dravidian languages' },
|
||||
{ code3: 'dsb', code2: '', name: 'Lower Sorbian' },
|
||||
{ code3: 'dua', code2: '', name: 'Duala' },
|
||||
{ code3: 'dum', code2: '', name: 'Dutch, Middle (ca.1050-1350)' },
|
||||
{ code3: 'dut', code2: 'nl', name: 'Dutch; Flemish' },
|
||||
{ code3: 'dyu', code2: '', name: 'Dyula' },
|
||||
{ code3: 'dzo', code2: 'dz', name: 'Dzongkha' },
|
||||
{ code3: 'efi', code2: '', name: 'Efik' },
|
||||
{ code3: 'egy', code2: '', name: 'Egyptian (Ancient)' },
|
||||
{ code3: 'eka', code2: '', name: 'Ekajuk' },
|
||||
{ code3: 'ell', code2: 'el', name: 'Greek, Modern (1453-)' },
|
||||
{ code3: 'elx', code2: '', name: 'Elamite' },
|
||||
{ code3: 'eng', code2: 'en', name: 'English' },
|
||||
{ code3: 'enm', code2: '', name: 'English, Middle (1100-1500)' },
|
||||
{ code3: 'epo', code2: 'eo', name: 'Esperanto' },
|
||||
{ code3: 'est', code2: 'et', name: 'Estonian' },
|
||||
{ code3: 'eus', code2: 'eu', name: 'Basque' },
|
||||
{ code3: 'ewe', code2: 'ee', name: 'Ewe' },
|
||||
{ code3: 'ewo', code2: '', name: 'Ewondo' },
|
||||
{ code3: 'fan', code2: '', name: 'Fang' },
|
||||
{ code3: 'fao', code2: 'fo', name: 'Faroese' },
|
||||
{ code3: 'fas', code2: 'fa', name: 'Persian' },
|
||||
{ code3: 'fat', code2: '', name: 'Fanti' },
|
||||
{ code3: 'fij', code2: 'fj', name: 'Fijian' },
|
||||
{ code3: 'fil', code2: '', name: 'Filipino; Pilipino' },
|
||||
{ code3: 'fin', code2: 'fi', name: 'Finnish' },
|
||||
{ code3: 'fiu', code2: '', name: 'Finno-Ugrian languages' },
|
||||
{ code3: 'fon', code2: '', name: 'Fon' },
|
||||
{ code3: 'fra', code2: 'fr', name: 'French' },
|
||||
{ code3: 'fre', code2: 'fr', name: 'French' },
|
||||
{ code3: 'frm', code2: '', name: 'French, Middle (ca.1400-1600)' },
|
||||
{ code3: 'fro', code2: '', name: 'French, Old (842-ca.1400)' },
|
||||
{ code3: 'frr', code2: '', name: 'Northern Frisian' },
|
||||
{ code3: 'frs', code2: '', name: 'Eastern Frisian' },
|
||||
{ code3: 'fry', code2: 'fy', name: 'Western Frisian' },
|
||||
{ code3: 'ful', code2: 'ff', name: 'Fulah' },
|
||||
{ code3: 'fur', code2: '', name: 'Friulian' },
|
||||
{ code3: 'gaa', code2: '', name: 'Ga' },
|
||||
{ code3: 'gay', code2: '', name: 'Gayo' },
|
||||
{ code3: 'gba', code2: '', name: 'Gbaya' },
|
||||
{ code3: 'gem', code2: '', name: 'Germanic languages' },
|
||||
{ code3: 'geo', code2: 'ka', name: 'Georgian' },
|
||||
{ code3: 'ger', code2: 'de', name: 'German' },
|
||||
{ code3: 'gez', code2: '', name: 'Geez' },
|
||||
{ code3: 'gil', code2: '', name: 'Gilbertese' },
|
||||
{ code3: 'gla', code2: 'gd', name: 'Gaelic; Scottish Gaelic' },
|
||||
{ code3: 'gle', code2: 'ga', name: 'Irish' },
|
||||
{ code3: 'glg', code2: 'gl', name: 'Galician' },
|
||||
{ code3: 'glv', code2: 'gv', name: 'Manx' },
|
||||
{ code3: 'gmh', code2: '', name: 'German, Middle High (ca.1050-1500)' },
|
||||
{ code3: 'goh', code2: '', name: 'German, Old High (ca.750-1050)' },
|
||||
{ code3: 'gon', code2: '', name: 'Gondi' },
|
||||
{ code3: 'gor', code2: '', name: 'Gorontalo' },
|
||||
{ code3: 'got', code2: '', name: 'Gothic' },
|
||||
{ code3: 'grb', code2: '', name: 'Grebo' },
|
||||
{ code3: 'grc', code2: '', name: 'Greek, Ancient (to 1453)' },
|
||||
{ code3: 'gre', code2: 'el', name: 'Greek, Modern (1453-)' },
|
||||
{ code3: 'grn', code2: 'gn', name: 'Guarani' },
|
||||
{ code3: 'gsw', code2: '', name: 'Swiss German; Alemannic; Alsatian' },
|
||||
{ code3: 'gujgu', code2: 'Gujarati', name: 'goudjrati' },
|
||||
{ code3: 'gwi', code2: '', name: "Gwich'in" },
|
||||
{ code3: 'hai', code2: '', name: 'Haida' },
|
||||
{ code3: 'hat', code2: 'ht', name: 'Haitian; Haitian Creole' },
|
||||
{ code3: 'hau', code2: 'ha', name: 'Hausa' },
|
||||
{ code3: 'haw', code2: '', name: 'Hawaiian' },
|
||||
{ code3: 'heb', code2: 'he', name: 'Hebrew' },
|
||||
{ code3: 'her', code2: 'hz', name: 'Herero' },
|
||||
{ code3: 'hil', code2: '', name: 'Hiligaynon' },
|
||||
{
|
||||
code3: 'him',
|
||||
code2: '',
|
||||
name: 'Himachali languages; Western Pahari languages',
|
||||
},
|
||||
{ code3: 'hin', code2: 'hi', name: 'Hindi' },
|
||||
{ code3: 'hit', code2: '', name: 'Hittite' },
|
||||
{ code3: 'hmn', code2: '', name: 'Hmong; Mong' },
|
||||
{ code3: 'hmo', code2: 'ho', name: 'Hiri Motu' },
|
||||
{ code3: 'hrv', code2: 'hr', name: 'Croatian' },
|
||||
{ code3: 'hsb', code2: '', name: 'Upper Sorbian' },
|
||||
{ code3: 'hun', code2: 'hu', name: 'Hungarian' },
|
||||
{ code3: 'hup', code2: '', name: 'Hupa' },
|
||||
{ code3: 'hye', code2: 'hy', name: 'Armenian' },
|
||||
{ code3: 'iba', code2: '', name: 'Iban' },
|
||||
{ code3: 'ibo', code2: 'ig', name: 'Igbo' },
|
||||
{ code3: 'ice', code2: 'is', name: 'Icelandic' },
|
||||
{ code3: 'ido', code2: 'io', name: 'Ido' },
|
||||
{ code3: 'iii', code2: 'ii', name: 'Sichuan Yi; Nuosu' },
|
||||
{ code3: 'ijo', code2: '', name: 'Ijo languages' },
|
||||
{ code3: 'iku', code2: 'iu', name: 'Inuktitut' },
|
||||
{ code3: 'ile', code2: 'ie', name: 'Interlingue; Occidental' },
|
||||
{ code3: 'ilo', code2: '', name: 'Iloko' },
|
||||
{
|
||||
code3: 'ina',
|
||||
code2: 'ia',
|
||||
name: 'Interlingua (International Auxiliary Language Association)',
|
||||
},
|
||||
{ code3: 'inc', code2: '', name: 'Indic languages' },
|
||||
{ code3: 'ind', code2: 'id', name: 'Indonesian' },
|
||||
{ code3: 'ine', code2: '', name: 'Indo-European languages' },
|
||||
{ code3: 'inh', code2: '', name: 'Ingush' },
|
||||
{ code3: 'ipk', code2: 'ik', name: 'Inupiaq' },
|
||||
{ code3: 'ira', code2: '', name: 'Iranian languages' },
|
||||
{ code3: 'iro', code2: '', name: 'Iroquoian languages' },
|
||||
{ code3: 'isl', code2: 'is', name: 'Icelandic' },
|
||||
{ code3: 'ita', code2: 'it', name: 'Italian' },
|
||||
{ code3: 'jav', code2: 'jv', name: 'Javanese' },
|
||||
{ code3: 'jbo', code2: '', name: 'Lojban' },
|
||||
{ code3: 'jpn', code2: 'ja', name: 'Japanese' },
|
||||
{ code3: 'jpr', code2: '', name: 'Judeo-Persian' },
|
||||
{ code3: 'jrb', code2: '', name: 'Judeo-Arabic' },
|
||||
{ code3: 'kaa', code2: '', name: 'Kara-Kalpak' },
|
||||
{ code3: 'kab', code2: '', name: 'Kabyle' },
|
||||
{ code3: 'kac', code2: '', name: 'Kachin; Jingpho' },
|
||||
{ code3: 'kal', code2: 'kl', name: 'Kalaallisut; Greenlandic' },
|
||||
{ code3: 'kam', code2: '', name: 'Kamba' },
|
||||
{ code3: 'kan', code2: 'kn', name: 'Kannada' },
|
||||
{ code3: 'kar', code2: '', name: 'Karen languages' },
|
||||
{ code3: 'kas', code2: 'ks', name: 'Kashmiri' },
|
||||
{ code3: 'kat', code2: 'ka', name: 'Georgian' },
|
||||
{ code3: 'kau', code2: 'kr', name: 'Kanuri' },
|
||||
{ code3: 'kaw', code2: '', name: 'Kawi' },
|
||||
{ code3: 'kaz', code2: 'kk', name: 'Kazakh' },
|
||||
{ code3: 'kbd', code2: '', name: 'Kabardian' },
|
||||
{ code3: 'kha', code2: '', name: 'Khasi' },
|
||||
{ code3: 'khi', code2: '', name: 'Khoisan languages' },
|
||||
{ code3: 'khm', code2: 'km', name: 'Central Khmer' },
|
||||
{ code3: 'kho', code2: '', name: 'Khotanese; Sakan' },
|
||||
{ code3: 'kik', code2: 'ki', name: 'Kikuyu; Gikuyu' },
|
||||
{ code3: 'kin', code2: 'rw', name: 'Kinyarwanda' },
|
||||
{ code3: 'kir', code2: 'ky', name: 'Kirghiz; Kyrgyz' },
|
||||
{ code3: 'kmb', code2: '', name: 'Kimbundu' },
|
||||
{ code3: 'kok', code2: '', name: 'Konkani' },
|
||||
{ code3: 'kom', code2: 'kv', name: 'Komi' },
|
||||
{ code3: 'kon', code2: 'kg', name: 'Kongo' },
|
||||
{ code3: 'kor', code2: 'ko', name: 'Korean' },
|
||||
{ code3: 'kos', code2: '', name: 'Kosraean' },
|
||||
{ code3: 'kpe', code2: '', name: 'Kpelle' },
|
||||
{ code3: 'krc', code2: '', name: 'Karachay-Balkar' },
|
||||
{ code3: 'krl', code2: '', name: 'Karelian' },
|
||||
{ code3: 'kro', code2: '', name: 'Kru languages' },
|
||||
{ code3: 'kru', code2: '', name: 'Kurukh' },
|
||||
{ code3: 'kua', code2: 'kj', name: 'Kuanyama; Kwanyama' },
|
||||
{ code3: 'kum', code2: '', name: 'Kumyk' },
|
||||
{ code3: 'kur', code2: 'ku', name: 'Kurdish' },
|
||||
{ code3: 'kut', code2: '', name: 'Kutenai' },
|
||||
{ code3: 'lad', code2: '', name: 'Ladino' },
|
||||
{ code3: 'lah', code2: '', name: 'Lahnda' },
|
||||
{ code3: 'lam', code2: '', name: 'Lamba' },
|
||||
{ code3: 'lao', code2: 'lo', name: 'Lao' },
|
||||
{ code3: 'lat', code2: 'la', name: 'Latin' },
|
||||
{ code3: 'lav', code2: 'lv', name: 'Latvian' },
|
||||
{ code3: 'lez', code2: '', name: 'Lezghian' },
|
||||
{ code3: 'lim', code2: 'li', name: 'Limburgan; Limburger; Limburgish' },
|
||||
{ code3: 'lin', code2: 'ln', name: 'Lingala' },
|
||||
{ code3: 'lit', code2: 'lt', name: 'Lithuanian' },
|
||||
{ code3: 'lol', code2: '', name: 'Mongo' },
|
||||
{ code3: 'loz', code2: '', name: 'Lozi' },
|
||||
{ code3: 'ltz', code2: 'lb', name: 'Luxembourgish; Letzeburgesch' },
|
||||
{ code3: 'lua', code2: '', name: 'Luba-Lulua' },
|
||||
{ code3: 'lub', code2: 'lu', name: 'Luba-Katanga' },
|
||||
{ code3: 'lug', code2: 'lg', name: 'Ganda' },
|
||||
{ code3: 'lui', code2: '', name: 'Luiseno' },
|
||||
{ code3: 'lun', code2: '', name: 'Lunda' },
|
||||
{
|
||||
code3: 'luo',
|
||||
code2: ' Luo (Kenya and Tanzania)',
|
||||
name: 'luo (Kenya et Tanzanie)',
|
||||
},
|
||||
{ code3: 'lus', code2: '', name: 'Lushai' },
|
||||
{ code3: 'mac', code2: 'mk', name: 'Macedonian' },
|
||||
{ code3: 'mad', code2: '', name: 'Madurese' },
|
||||
{ code3: 'mag', code2: '', name: 'Magahi' },
|
||||
{ code3: 'mah', code2: 'mh', name: 'Marshallese' },
|
||||
{ code3: 'mai', code2: '', name: 'Maithili' },
|
||||
{ code3: 'mak', code2: '', name: 'Makasar' },
|
||||
{ code3: 'mal', code2: 'ml', name: 'Malayalam' },
|
||||
{ code3: 'man', code2: '', name: 'Mandingo' },
|
||||
{ code3: 'mao', code2: 'mi', name: 'Maori' },
|
||||
{ code3: 'map', code2: '', name: 'Austronesian languages' },
|
||||
{ code3: 'mar', code2: 'mr', name: 'Marathi' },
|
||||
{ code3: 'mas', code2: '', name: 'Masai' },
|
||||
{ code3: 'may', code2: 'ms', name: 'Malay' },
|
||||
{ code3: 'mdf', code2: '', name: 'Moksha' },
|
||||
{ code3: 'mdr', code2: '', name: 'Mandar' },
|
||||
{ code3: 'men', code2: '', name: 'Mende' },
|
||||
{ code3: 'mga', code2: '', name: 'Irish, Middle (900-1200)' },
|
||||
{ code3: 'mic', code2: '', name: "Mi'kmaq; Micmac" },
|
||||
{ code3: 'min', code2: '', name: 'Minangkabau' },
|
||||
{ code3: 'mis', code2: '', name: 'Uncoded languages' },
|
||||
{ code3: 'mkd', code2: 'mk', name: 'Macedonian' },
|
||||
{ code3: 'mkh', code2: '', name: 'Mon-Khmer languages' },
|
||||
{ code3: 'mlg', code2: 'mg', name: 'Malagasy' },
|
||||
{ code3: 'mlt', code2: 'mt', name: 'Maltese' },
|
||||
{ code3: 'mnc', code2: '', name: 'Manchu' },
|
||||
{ code3: 'mni', code2: '', name: 'Manipuri' },
|
||||
{ code3: 'mno', code2: '', name: 'Manobo languages' },
|
||||
{ code3: 'moh', code2: '', name: 'Mohawk' },
|
||||
{ code3: 'mon', code2: 'mn', name: 'Mongolian' },
|
||||
{ code3: 'mos', code2: '', name: 'Mossi' },
|
||||
{ code3: 'mri', code2: 'mi', name: 'Maori' },
|
||||
{ code3: 'msa', code2: 'ms', name: 'Malay' },
|
||||
{ code3: 'mul', code2: '', name: 'Multiple languages' },
|
||||
{ code3: 'mun', code2: '', name: 'Munda languages' },
|
||||
{ code3: 'mus', code2: '', name: 'Creek' },
|
||||
{ code3: 'mwl', code2: '', name: 'Mirandese' },
|
||||
{ code3: 'mwr', code2: '', name: 'Marwari' },
|
||||
{ code3: 'mya', code2: 'my', name: 'Burmese' },
|
||||
{ code3: 'myn', code2: '', name: 'Mayan languages' },
|
||||
{ code3: 'myv', code2: '', name: 'Erzya' },
|
||||
{ code3: 'nah', code2: '', name: 'Nahuatl languages' },
|
||||
{ code3: 'nai', code2: '', name: 'North American Indian languages' },
|
||||
{ code3: 'nap', code2: '', name: 'Neapolitan' },
|
||||
{ code3: 'nau', code2: 'na', name: 'Nauru' },
|
||||
{ code3: 'nav', code2: 'nv', name: 'Navajo; Navaho' },
|
||||
{ code3: 'nbl', code2: 'nr', name: 'Ndebele, South; South Ndebele' },
|
||||
{ code3: 'nde', code2: 'nd', name: 'Ndebele, North; North Ndebele' },
|
||||
{ code3: 'ndo', code2: 'ng', name: 'Ndonga' },
|
||||
{
|
||||
code3: 'nds',
|
||||
code2: '',
|
||||
name: 'Low German; Low Saxon; German, Low; Saxon, Low',
|
||||
},
|
||||
{ code3: 'nep', code2: 'ne', name: 'Nepali' },
|
||||
{ code3: 'new', code2: '', name: 'Nepal Bhasa; Newari' },
|
||||
{ code3: 'nia', code2: '', name: 'Nias' },
|
||||
{ code3: 'nic', code2: '', name: 'Niger-Kordofanian languages' },
|
||||
{ code3: 'niu', code2: '', name: 'Niuean' },
|
||||
{ code3: 'nld', code2: 'nl', name: 'Dutch; Flemish' },
|
||||
{ code3: 'nno', code2: 'nn', name: 'Norwegian Nynorsk; Nynorsk, Norwegian' },
|
||||
{ code3: 'nob', code2: 'nb', name: 'Bokmål, Norwegian; Norwegian Bokmål' },
|
||||
{ code3: 'nog', code2: '', name: 'Nogai' },
|
||||
{ code3: 'non', code2: '', name: 'Norse, Old' },
|
||||
{ code3: 'nor', code2: 'no', name: 'Norwegian' },
|
||||
{ code3: 'nqo', code2: '', name: "N'Ko" },
|
||||
{ code3: 'nso', code2: '', name: 'Pedi; Sepedi; Northern Sotho' },
|
||||
{ code3: 'nub', code2: '', name: 'Nubian languages' },
|
||||
{
|
||||
code3: 'nwc',
|
||||
code2: '',
|
||||
name: 'Classical Newari; Old Newari; Classical Nepal Bhasa',
|
||||
},
|
||||
{ code3: 'nya', code2: 'ny', name: 'Chichewa; Chewa; Nyanja' },
|
||||
{ code3: 'nym', code2: '', name: 'Nyamwezi' },
|
||||
{ code3: 'nyn', code2: '', name: 'Nyankole' },
|
||||
{ code3: 'nyo', code2: '', name: 'Nyoro' },
|
||||
{ code3: 'nzi', code2: '', name: 'Nzima' },
|
||||
{ code3: 'oci', code2: 'oc', name: 'Occitan (post 1500)' },
|
||||
{ code3: 'oji', code2: 'oj', name: 'Ojibwa' },
|
||||
{ code3: 'ori', code2: 'or', name: 'Oriya' },
|
||||
{ code3: 'orm', code2: 'om', name: 'Oromo' },
|
||||
{ code3: 'osa', code2: '', name: 'Osage' },
|
||||
{ code3: 'oss', code2: 'os', name: 'Ossetian; Ossetic' },
|
||||
{ code3: 'ota', code2: '', name: 'Turkish, Ottoman (1500-1928)' },
|
||||
{ code3: 'oto', code2: '', name: 'Otomian languages' },
|
||||
{ code3: 'paa', code2: '', name: 'Papuan languages' },
|
||||
{ code3: 'pag', code2: '', name: 'Pangasinan' },
|
||||
{ code3: 'pal', code2: ' ', name: 'Pahlavi' },
|
||||
{ code3: 'pam', code2: ' ', name: 'Pampanga; Kapampangan' },
|
||||
{ code3: 'pan', code2: 'paPanjabi; Punjabi', name: 'pendjabi' },
|
||||
{ code3: 'pap', code2: ' ', name: 'Papiamento' },
|
||||
{ code3: 'pau', code2: ' ', name: 'Palauan' },
|
||||
{ code3: 'peo', code2: ' ', name: 'Persian, Old (ca.600-400 B.C.)' },
|
||||
{ code3: 'per', code2: 'fa', name: 'Persian' },
|
||||
{ code3: 'phi', code2: ' ', name: 'Philippine languages' },
|
||||
{ code3: 'phn', code2: ' ', name: 'Phoenician' },
|
||||
{ code3: 'pli', code2: 'pi', name: 'Pali' },
|
||||
{ code3: 'pol', code2: 'pl', name: 'Polish' },
|
||||
{ code3: 'pon', code2: ' ', name: 'Pohnpeian' },
|
||||
{ code3: 'por', code2: 'pt', name: 'Portuguese' },
|
||||
{ code3: 'pra', code2: ' ', name: 'Prakrit languages' },
|
||||
{
|
||||
code3: 'pro',
|
||||
code2: ' ',
|
||||
name: 'Provençal, Old (to 1500);Occitan, Old (to 1500)',
|
||||
},
|
||||
{ code3: 'pus', code2: 'ps', name: 'Pushto; Pashto' },
|
||||
{ code3: 'que', code2: 'qu', name: 'Quechua' },
|
||||
{ code3: 'raj', code2: ' ', name: 'Rajasthani' },
|
||||
{ code3: 'rap', code2: ' ', name: 'Rapanui' },
|
||||
{ code3: 'rar', code2: ' ', name: 'Rarotongan; Cook Islands Maori' },
|
||||
{ code3: 'roa', code2: ' ', name: 'Romance languages' },
|
||||
{ code3: 'roh', code2: 'rm', name: 'Romansh' },
|
||||
{ code3: 'rom', code2: ' ', name: 'Romany' },
|
||||
{ code3: 'rum', code2: 'ro', name: 'Romanian; Moldavian; Moldovan' },
|
||||
{ code3: 'ron', code2: 'ro', name: 'Romanian; Moldavian; Moldovan' },
|
||||
{ code3: 'run', code2: 'rn', name: 'Rundi' },
|
||||
{ code3: 'rup', code2: ' ', name: 'Aromanian; Arumanian; Macedo-Romanian' },
|
||||
{ code3: 'rus', code2: 'ru', name: 'Russian' },
|
||||
{ code3: 'sad', code2: ' ', name: 'Sandawe' },
|
||||
{ code3: 'sag', code2: 'sg', name: 'Sango' },
|
||||
{ code3: 'sah', code2: ' ', name: 'Yakut' },
|
||||
{ code3: 'sai', code2: ' ', name: 'South American Indian languages' },
|
||||
{ code3: 'sal', code2: ' ', name: 'Salishan languages' },
|
||||
{ code3: 'sam', code2: ' ', name: 'Samaritan Aramaic' },
|
||||
{ code3: 'san', code2: 'sa', name: 'Sanskrit' },
|
||||
{ code3: 'sas', code2: ' ', name: 'Sasak' },
|
||||
{ code3: 'sat', code2: ' ', name: 'Santali' },
|
||||
{ code3: 'scn', code2: ' ', name: 'Sicilian' },
|
||||
{ code3: 'sco', code2: ' ', name: 'Scots' },
|
||||
{ code3: 'sel', code2: ' ', name: 'Selkup' },
|
||||
{ code3: 'sem', code2: ' ', name: 'Semitic languages' },
|
||||
{ code3: 'sga', code2: ' ', name: 'Irish, Old (to 900)' },
|
||||
{ code3: 'sgn', code2: ' ', name: 'Sign Languages' },
|
||||
{ code3: 'shn', code2: ' ', name: 'Shan' },
|
||||
{ code3: 'sid', code2: ' ', name: 'Sidamo' },
|
||||
{ code3: 'sin', code2: 'si', name: 'Sinhala; Sinhalese' },
|
||||
{ code3: 'sio', code2: ' ', name: 'Siouan languages' },
|
||||
{ code3: 'sit', code2: ' ', name: 'Sino-Tibetan languages' },
|
||||
{ code3: 'sla', code2: ' ', name: 'Slavic languages' },
|
||||
{ code3: 'slo', code2: 'sk', name: 'Slovak' },
|
||||
{ code3: 'slk', code2: 'sk', name: 'Slovak' },
|
||||
{ code3: 'slv', code2: 'sl', name: 'Slovenian' },
|
||||
{ code3: 'sma', code2: ' ', name: 'Southern Sami' },
|
||||
{ code3: 'sme', code2: 'se', name: 'Northern Sami' },
|
||||
{ code3: 'smi', code2: ' ', name: 'Sami languages' },
|
||||
{ code3: 'smj', code2: ' ', name: 'Lule Sami' },
|
||||
{ code3: 'smn', code2: ' ', name: 'Inari Sami' },
|
||||
{ code3: 'smo', code2: 'sm', name: 'Samoan' },
|
||||
{ code3: 'sms', code2: ' ', name: 'Skolt Sami' },
|
||||
{ code3: 'sna', code2: 'sn', name: 'Shona' },
|
||||
{ code3: 'snd', code2: 'sd', name: 'Sindhi' },
|
||||
{ code3: 'snk', code2: ' ', name: 'Soninke' },
|
||||
{ code3: 'sog', code2: ' ', name: 'Sogdian' },
|
||||
{ code3: 'som', code2: 'so', name: 'Somali' },
|
||||
{ code3: 'son', code2: ' ', name: 'Songhai languages' },
|
||||
{ code3: 'sot', code2: 'st', name: 'Sotho, Southern' },
|
||||
{ code3: 'spa', code2: 'es', name: 'Spanish' },
|
||||
{ code3: 'sqi', code2: 'sq', name: 'Albanian' },
|
||||
{ code3: 'srd', code2: 'sc', name: 'Sardinian' },
|
||||
{ code3: 'srn', code2: ' ', name: 'Sranan Tongo' },
|
||||
{ code3: 'srp', code2: 'sr', name: 'Serbian' },
|
||||
{ code3: 'srr', code2: ' ', name: 'Serer' },
|
||||
{ code3: 'ssa', code2: ' ', name: 'Nilo-Saharan languages' },
|
||||
{ code3: 'ssw', code2: 'ss', name: 'Swati' },
|
||||
{ code3: 'suk', code2: ' ', name: 'Sukuma' },
|
||||
{ code3: 'sun', code2: 'su', name: 'Sundanese' },
|
||||
{ code3: 'sus', code2: ' ', name: 'Susu' },
|
||||
{ code3: 'sux', code2: ' ', name: 'Sumerian' },
|
||||
{ code3: 'swa', code2: 'sw', name: 'Swahili' },
|
||||
{ code3: 'swe', code2: 'sv', name: 'Swedish' },
|
||||
{ code3: 'syc', code2: ' ', name: 'Classical Syriac' },
|
||||
{ code3: 'syr', code2: ' ', name: 'Syriac' },
|
||||
{ code3: 'tah', code2: 'ty', name: 'Tahitian' },
|
||||
{ code3: 'tai', code2: ' ', name: 'Tai languages' },
|
||||
{ code3: 'tam', code2: 'ta', name: 'Tamil' },
|
||||
{ code3: 'tat', code2: 'tt', name: 'Tatar' },
|
||||
{ code3: 'tel', code2: 'te', name: 'Telugu' },
|
||||
{ code3: 'tem', code2: ' ', name: 'Timne' },
|
||||
{ code3: 'ter', code2: ' ', name: 'Tereno' },
|
||||
{ code3: 'tet', code2: ' ', name: 'Tetum' },
|
||||
{ code3: 'tgk', code2: 'tg', name: 'Tajik' },
|
||||
{ code3: 'tgl', code2: 'tl', name: 'Tagalog' },
|
||||
{ code3: 'tha', code2: 'th', name: 'Thai' },
|
||||
{ code3: 'tib', code2: 'bo', name: 'Tibetan' },
|
||||
{ code3: 'tig', code2: ' ', name: 'Tigre' },
|
||||
{ code3: 'tir', code2: 'ti', name: 'Tigrinya' },
|
||||
{ code3: 'tiv', code2: ' ', name: 'Tiv' },
|
||||
{ code3: 'tkl', code2: ' ', name: 'Tokelau' },
|
||||
{ code3: 'tlh', code2: ' ', name: 'Klingon; tlhIngan-Hol' },
|
||||
{ code3: 'tli', code2: ' ', name: 'Tlingit' },
|
||||
{ code3: 'tmh', code2: ' ', name: 'Tamashek' },
|
||||
{ code3: 'tog', code2: ' ', name: 'Tonga (Nyasa)' },
|
||||
{ code3: 'ton', code2: 'to', name: 'Tonga (Tonga Islands)' },
|
||||
{ code3: 'tpi', code2: ' ', name: 'Tok Pisin' },
|
||||
{ code3: 'tsi', code2: ' ', name: 'Tsimshian' },
|
||||
{ code3: 'tsn', code2: 'tn', name: 'Tswana' },
|
||||
{ code3: 'tso', code2: 'ts', name: 'Tsonga' },
|
||||
{ code3: 'tuk', code2: 'tk', name: 'Turkmen' },
|
||||
{ code3: 'tum', code2: ' ', name: 'Tumbuka' },
|
||||
{ code3: 'tup', code2: ' ', name: 'Tupi languages' },
|
||||
{ code3: 'tur', code2: 'tr', name: 'Turkish' },
|
||||
{ code3: 'tut', code2: ' ', name: 'Altaic languages' },
|
||||
{ code3: 'tvl', code2: ' ', name: 'Tuvalu' },
|
||||
{ code3: 'twi', code2: 'tw', name: 'Twi' },
|
||||
{ code3: 'tyv', code2: ' ', name: 'Tuvinian' },
|
||||
{ code3: 'udm', code2: ' ', name: 'Udmurt' },
|
||||
{ code3: 'uga', code2: ' ', name: 'Ugaritic' },
|
||||
{ code3: 'uig', code2: 'ug', name: 'Uighur; Uyghur' },
|
||||
{ code3: 'ukr', code2: 'uk', name: 'Ukrainian' },
|
||||
{ code3: 'umb', code2: ' ', name: 'Umbundu' },
|
||||
{ code3: 'und', code2: ' ', name: 'Undetermined' },
|
||||
{ code3: 'urd', code2: 'ur', name: 'Urdu' },
|
||||
{ code3: 'uzb', code2: 'uz', name: 'Uzbek' },
|
||||
{ code3: 'vai', code2: ' ', name: 'Vai' },
|
||||
{ code3: 'ven', code2: 've', name: 'Venda' },
|
||||
{ code3: 'vie', code2: 'vi', name: 'Vietnamese' },
|
||||
{ code3: 'vol', code2: 'vo', name: 'Volapük' },
|
||||
{ code3: 'vot', code2: ' ', name: 'Votic' },
|
||||
{ code3: 'wak', code2: ' ', name: 'Wakashan languages' },
|
||||
{ code3: 'wal', code2: ' ', name: 'Wolaitta; Wolaytta' },
|
||||
{ code3: 'war', code2: ' ', name: 'Waray' },
|
||||
{ code3: 'was', code2: ' ', name: 'Washo' },
|
||||
{ code3: 'wel', code2: 'cy', name: 'Welsh' },
|
||||
{ code3: 'wen', code2: ' ', name: 'Sorbian languages' },
|
||||
{ code3: 'wln', code2: 'wa', name: 'Walloon' },
|
||||
{ code3: 'wol', code2: 'wo', name: 'Wolof' },
|
||||
{ code3: 'xal', code2: ' ', name: 'Kalmyk; Oirat' },
|
||||
{ code3: 'xho', code2: 'xh', name: 'Xhosa' },
|
||||
{ code3: 'yao', code2: ' ', name: 'Yao' },
|
||||
{ code3: 'yap', code2: ' ', name: 'Yapese' },
|
||||
{ code3: 'yid', code2: 'yi', name: 'Yiddish' },
|
||||
{ code3: 'yor', code2: 'yo', name: 'Yoruba' },
|
||||
{ code3: 'ypk', code2: ' ', name: 'Yupik languages' },
|
||||
{ code3: 'zap', code2: ' ', name: 'Zapotec' },
|
||||
{ code3: 'zbl', code2: ' ', name: 'Blissymbols; Blissymbolics; Bliss' },
|
||||
{ code3: 'zen', code2: ' ', name: 'Zenaga' },
|
||||
{ code3: 'zgh', code2: ' ', name: 'Standard Moroccan Tamazight' },
|
||||
{ code3: 'zha', code2: 'za', name: 'Zhuang; Chuang' },
|
||||
{ code3: 'zho', code2: 'zh', name: 'Chinese' },
|
||||
{ code3: 'znd', code2: ' ', name: 'Zande languages' },
|
||||
{ code3: 'zul', code2: 'zu', name: 'Zulu' },
|
||||
{ code3: 'zun', code2: ' ', name: 'Zuni' },
|
||||
{
|
||||
code3: 'zza',
|
||||
code2: '',
|
||||
name: 'Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki',
|
||||
},
|
||||
]
|
||||
|
||||
export const LANGUAGES_MAP_CODE3 = Object.fromEntries(
|
||||
LANGUAGES.map((lang) => [lang.code3, lang]),
|
||||
)
|
||||
|
||||
export function code3ToCode2(lang: string): string {
|
||||
if (lang.length === 3) {
|
||||
return LANGUAGES_MAP_CODE3[lang]?.code2 || lang
|
||||
}
|
||||
return lang
|
||||
}
|
@ -1,7 +1,16 @@
|
||||
import lande from 'lande'
|
||||
|
||||
import { ModerationService } from '.'
|
||||
import { ModSubject } from './subject'
|
||||
import { ModerationSubjectStatusRow } from './types'
|
||||
import { langLogger as log } from '../logger'
|
||||
import { code3ToCode2 } from './lang-data'
|
||||
import {
|
||||
AppBskyActorProfile,
|
||||
AppBskyFeedGenerator,
|
||||
AppBskyFeedPost,
|
||||
AppBskyGraphList,
|
||||
} from '@atproto/api'
|
||||
|
||||
export class ModerationLangService {
|
||||
constructor(private moderationService: ModerationService) {}
|
||||
@ -40,6 +49,23 @@ export class ModerationLangService {
|
||||
}
|
||||
}
|
||||
|
||||
getTextFromRecord(recordValue?: Record<string, unknown>): string | undefined {
|
||||
let text: string | undefined
|
||||
|
||||
if (AppBskyGraphList.isRecord(recordValue)) {
|
||||
text = recordValue.description || recordValue.name
|
||||
} else if (
|
||||
AppBskyFeedGenerator.isRecord(recordValue) ||
|
||||
AppBskyActorProfile.isRecord(recordValue)
|
||||
) {
|
||||
text = recordValue.description || recordValue.displayName
|
||||
} else if (AppBskyFeedPost.isRecord(recordValue)) {
|
||||
text = recordValue.text
|
||||
}
|
||||
|
||||
return text?.trim()
|
||||
}
|
||||
|
||||
async getRecordLang({
|
||||
subject,
|
||||
}: {
|
||||
@ -70,10 +96,17 @@ export class ModerationLangService {
|
||||
])
|
||||
const record = recordByUri.get(subject.uri)
|
||||
const recordLang = record?.value.langs as string[] | null
|
||||
const recordText = this.getTextFromRecord(record?.value)
|
||||
if (recordLang?.length) {
|
||||
recordLang
|
||||
.map((lang) => lang.split('-')[0])
|
||||
.forEach((lang) => langs.add(lang))
|
||||
} else if (recordText) {
|
||||
const detectedLanguages = lande(recordText)
|
||||
if (detectedLanguages.length) {
|
||||
const langCode = code3ToCode2(detectedLanguages[0][0])
|
||||
if (langCode) langs.add(langCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ Object {
|
||||
"subjectBlobCids": Array [],
|
||||
"subjectRepoHandle": "alice.test",
|
||||
"tags": Array [
|
||||
"lang:und",
|
||||
"lang:en",
|
||||
],
|
||||
"takendown": true,
|
||||
"updatedAt": "1970-01-01T00:00:00.000Z",
|
||||
@ -141,7 +141,7 @@ Object {
|
||||
"subjectBlobCids": Array [],
|
||||
"subjectRepoHandle": "alice.test",
|
||||
"tags": Array [
|
||||
"lang:und",
|
||||
"lang:en",
|
||||
],
|
||||
"takendown": true,
|
||||
"updatedAt": "1970-01-01T00:00:00.000Z",
|
||||
|
@ -152,7 +152,7 @@ Array [
|
||||
"event": Object {
|
||||
"$type": "tools.ozone.moderation.defs#modEventTag",
|
||||
"add": Array [
|
||||
"lang:und",
|
||||
"lang:en",
|
||||
],
|
||||
"remove": Array [],
|
||||
},
|
||||
|
@ -94,7 +94,7 @@ Array [
|
||||
"subjectBlobCids": Array [],
|
||||
"subjectRepoHandle": "alice.test",
|
||||
"tags": Array [
|
||||
"lang:und",
|
||||
"lang:ha",
|
||||
],
|
||||
"takendown": false,
|
||||
"updatedAt": "1970-01-01T00:00:00.000Z",
|
||||
|
109
packages/ozone/tests/lang.test.ts
Normal file
109
packages/ozone/tests/lang.test.ts
Normal file
@ -0,0 +1,109 @@
|
||||
import {
|
||||
ModeratorClient,
|
||||
SeedClient,
|
||||
TestNetwork,
|
||||
basicSeed,
|
||||
} from '@atproto/dev-env'
|
||||
import AtpAgent from '@atproto/api'
|
||||
import { REASONSPAM } from '../src/lexicon/types/com/atproto/moderation/defs'
|
||||
|
||||
describe('moderation status language tagging', () => {
|
||||
let network: TestNetwork
|
||||
let agent: AtpAgent
|
||||
let sc: SeedClient
|
||||
let modClient: ModeratorClient
|
||||
|
||||
beforeAll(async () => {
|
||||
network = await TestNetwork.create({
|
||||
dbPostgresSchema: 'ozone_blob_divert_test',
|
||||
ozone: {
|
||||
blobDivertUrl: `https://blob-report.com`,
|
||||
blobDivertAdminPassword: 'test-auth-token',
|
||||
},
|
||||
})
|
||||
agent = network.pds.getClient()
|
||||
sc = network.getSeedClient()
|
||||
modClient = network.ozone.getModClient()
|
||||
await basicSeed(sc)
|
||||
await network.processAll()
|
||||
})
|
||||
|
||||
afterAll(async () => {
|
||||
await network.close()
|
||||
})
|
||||
|
||||
const getStatus = async (subject: string) => {
|
||||
const { subjectStatuses } = await modClient.queryStatuses({
|
||||
subject,
|
||||
})
|
||||
|
||||
return subjectStatuses[0]
|
||||
}
|
||||
|
||||
it('Adds language tag to post from text', async () => {
|
||||
const createPostAndReport = async (text: string) => {
|
||||
const post = await sc.post(sc.dids.carol, text)
|
||||
await network.processAll()
|
||||
const report = await sc.createReport({
|
||||
reasonType: REASONSPAM,
|
||||
subject: {
|
||||
$type: 'com.atproto.repo.strongRef',
|
||||
uri: post.ref.uriStr,
|
||||
cid: post.ref.cidStr,
|
||||
},
|
||||
reportedBy: sc.dids.alice,
|
||||
})
|
||||
|
||||
return { post, report }
|
||||
}
|
||||
const [japanesePost, greekPost] = await Promise.all([
|
||||
createPostAndReport('Xで有名な人達+反AIや絵描きによくない'),
|
||||
createPostAndReport(
|
||||
'Λορεμ ιπσθμ δολορ σιτ αμετ, μει θτ vιδιτ νοστρθμ προπριαε',
|
||||
),
|
||||
])
|
||||
|
||||
const [japanesePostStatus, greekPostStatus] = await Promise.all([
|
||||
getStatus(japanesePost.post.ref.uriStr),
|
||||
getStatus(greekPost.post.ref.uriStr),
|
||||
])
|
||||
|
||||
expect(japanesePostStatus.tags).toContain('lang:ja')
|
||||
expect(greekPostStatus.tags).toContain('lang:el')
|
||||
})
|
||||
|
||||
it('Uses name/description text for language tag for list', async () => {
|
||||
const createListAndReport = async (name: string, description?: string) => {
|
||||
const list = await sc.createList(sc.dids.carol, name, 'mod', {
|
||||
description,
|
||||
})
|
||||
await network.processAll()
|
||||
const report = await sc.createReport({
|
||||
reasonType: REASONSPAM,
|
||||
subject: {
|
||||
$type: 'com.atproto.repo.strongRef',
|
||||
uri: list.uriStr,
|
||||
cid: list.cidStr,
|
||||
},
|
||||
reportedBy: sc.dids.alice,
|
||||
})
|
||||
return { list, report }
|
||||
}
|
||||
|
||||
const [listWithDescription, listWithoutDescription] = await Promise.all([
|
||||
createListAndReport(
|
||||
'よくない',
|
||||
'Xで有名な人達+反AIや絵描きによくない感情を持つ人達+絵描き詐称',
|
||||
),
|
||||
createListAndReport('人達+反AIや絵描きによくない感情'),
|
||||
])
|
||||
|
||||
const [japaneseListStatus, chineseListStatus] = await Promise.all([
|
||||
getStatus(listWithDescription.list.uriStr),
|
||||
getStatus(listWithoutDescription.list.uriStr),
|
||||
])
|
||||
|
||||
expect(japaneseListStatus.tags).toContain('lang:ja')
|
||||
expect(chineseListStatus.tags).toContain('lang:ja')
|
||||
})
|
||||
})
|
@ -167,7 +167,7 @@ Array [
|
||||
"event": Object {
|
||||
"$type": "tools.ozone.moderation.defs#modEventTag",
|
||||
"add": Array [
|
||||
"lang:und",
|
||||
"lang:en",
|
||||
],
|
||||
"remove": Array [],
|
||||
},
|
||||
@ -221,7 +221,7 @@ Object {
|
||||
"subjectBlobCids": Array [],
|
||||
"subjectRepoHandle": "bob.test",
|
||||
"tags": Array [
|
||||
"lang:und",
|
||||
"lang:en",
|
||||
],
|
||||
"takendown": false,
|
||||
"updatedAt": "1970-01-01T00:00:00.000Z",
|
||||
|
13
pnpm-lock.yaml
generated
13
pnpm-lock.yaml
generated
@ -583,6 +583,9 @@ importers:
|
||||
kysely:
|
||||
specifier: ^0.22.0
|
||||
version: 0.22.0
|
||||
lande:
|
||||
specifier: ^1.0.10
|
||||
version: 1.0.10
|
||||
multiformats:
|
||||
specifier: ^9.9.0
|
||||
version: 9.9.0
|
||||
@ -8767,6 +8770,12 @@ packages:
|
||||
resolution: {integrity: sha512-TH+b56pVXQq0tsyooYLeNfV11j6ih7D50dyN8tkM0e7ndiUH28Nziojiog3qRFlmEj9XePYdZUrNJ2079Qjdow==}
|
||||
engines: {node: '>=14.0.0'}
|
||||
|
||||
/lande@1.0.10:
|
||||
resolution: {integrity: sha512-yT52DQh+UV2pEp08jOYrA4drDv0DbjpiRyZYgl25ak9G2cVR2AimzrqkYQWrD9a7Ud+qkAcaiDDoNH9DXfHPmw==}
|
||||
dependencies:
|
||||
toygrad: 2.6.0
|
||||
dev: false
|
||||
|
||||
/leven@3.1.0:
|
||||
resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==}
|
||||
engines: {node: '>=6'}
|
||||
@ -10708,6 +10717,10 @@ packages:
|
||||
'@tokenizer/token': 0.3.0
|
||||
ieee754: 1.2.1
|
||||
|
||||
/toygrad@2.6.0:
|
||||
resolution: {integrity: sha512-g4zBmlSbvzOE5FOILxYkAybTSxijKLkj1WoNqVGnbMcWDyj4wWQ+eYSr3ik7XOpIgMq/7eBcPRTJX3DM2E0YMg==}
|
||||
dev: false
|
||||
|
||||
/tr46@0.0.3:
|
||||
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
||||
dev: true
|
||||
|
Loading…
x
Reference in New Issue
Block a user