diff --git a/src/utils/language.test.ts b/src/utils/language.test.ts index 255f6b58..b9563bf0 100644 --- a/src/utils/language.test.ts +++ b/src/utils/language.test.ts @@ -1,28 +1,69 @@ import { detectLanguage } from '@/utils/language.ts'; -import { assertEquals } from '@std/assert'; +import { assertEquals, assertNotEquals } from '@std/assert'; +import { Conf } from '@/config.ts'; -Deno.test('Detect English language', () => { - assertEquals(detectLanguage(``, 0.90), undefined); - assertEquals(detectLanguage(`Good morning my fellow friends`, 0.90), 'en'); - assertEquals( - detectLanguage( - `Would you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, - 0.90, - ), - 'en', - ); - assertEquals( - detectLanguage( - `https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uWould you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, - 0.90, - ), - 'en', - ); - assertEquals( - detectLanguage( - `https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_u đŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸Ž https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_u Would you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, - 0.90, - ), - 'en', - ); +Deno.test('Tests for language detection', async (t) => { + await t.step('Empty string should return undefined', async () => { + assertEquals(await detectLanguage(``, 0.90), undefined); + }); + + await t.step('Regular English string should be detected', async () => { + assertEquals(await detectLanguage(`Good morning my fellow friends`, 0.90), 'en'); + }); + + await t.step('nostr event id should be ignored', async () => { + assertEquals( + await detectLanguage( + `Would you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, + 0.90, + ), + 'en', + ); + }); + + await t.step('URLs should be ignored', async () => { + assertEquals( + await detectLanguage( + `https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uWould you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, + 0.90, + ), + 'en', + ); + }); + + await t.step('Emoji should be ignored', async () => { + assertEquals( + await detectLanguage( + `https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_u đŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸ŽđŸ˜‚đŸ’¯â™ĄâŒ¨ī¸Ž https://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_uhttps://youtu.be/FxppefYTA2I?si=grgEpbEhFu_-3V_u Would you listen to Michael Jackson's songs?\n\nnostr:nevent1qvzqqqqqqypzqprpljlvcnpnw3pejvkkhrc3y6wvmd7vjuad0fg2ud3dky66gaxaqyvhwumn8ghj7cm0vfexzen4d4sjucm0d5hhyetvv9usqg8htx8xcjq7ffrzxu7nrhlr8vljcv6gpmet0auy87mpj6djxk4myqha02kp`, + 0.90, + ), + 'en', + ); + }); + + await t.step('The horrific problem sentence', async () => { + switch (Conf.languageDetector) { + case 'lingua': + assertEquals( + await detectLanguage(`It may die when I die, and that's okay. It's my earnings.`, 0.90), + 'en', + ); + break; + default: + assertNotEquals( + await detectLanguage(`It may die when I die, and that's okay. It's my earnings.`, 0.90), + 'en', + ); + break; + } + }); + + await t.step('The horrific problem sentence', async () => { + const tester = Conf.languageDetector === 'lingua' ? assertEquals : assertNotEquals; + tester(await detectLanguage(`It may die when I die, and that's okay. It's my earnings.`, 0.90), 'en'); + }); + // + await t.step('Should detect Hindi sentences', async () => { + assertEquals(await detectLanguage(`ā¤ŽāĨˆ ā¤Ąā¤ŋ⤟āĨā¤ŸāĨ‹ ⤕āĨ€ ⤍⤝āĨ€ ⤅⤍āĨā¤ĩā¤žā¤Ļ ⤏āĨā¤ĩā¤ŋā¤§ā¤ž ⤕āĨ‹ ⤆āĨ›ā¤Žā¤ž ā¤°ā¤šā¤ž ā¤šāĨ‚⤁`, 0.80), 'hi'); + }); });