| Akron | e23e292 | 2017-05-01 13:18:12 +0200 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| Akron | 1972c12 | 2017-03-14 17:46:55 +0100 | [diff] [blame] | 2 | use strict; |
| 3 | use warnings; |
| 4 | use Test::More; |
| 5 | use utf8; |
| 6 | use Mojo::Util qw/encode decode/; |
| 7 | |
| 8 | use_ok('Krawfish::Util::String'); |
| 9 | |
| 10 | is(fold_case('aaa'), 'aaa', 'Case fold 1'); |
| 11 | is(fold_case('AAA'), 'aaa', 'Case fold 2'); |
| 12 | is(fold_case('AaA'), 'aaa', 'Case fold 3'); |
| 13 | |
| 14 | is(fold_case('aäa'), 'aäa', 'Case fold 4'); |
| 15 | is(fold_case('aÄß'), 'aäss', 'Case fold 5'); |
| 16 | is(fold_case('a-Äß'), 'a-äss', 'Case fold 6'); |
| 17 | is(fold_case('ÄÖÜß'), 'äöüss', 'Case fold 7'); |
| 18 | |
| 19 | is(remove_diacritics('Česká'), 'Ceska', 'Removed diacritics'); |
| 20 | is(remove_diacritics('Äößa'), 'Aoßa', 'Removed diacritics'); |
| 21 | |
| Akron | 16fea19 | 2017-03-14 19:05:30 +0100 | [diff] [blame] | 22 | # From comment in http://archives.miloush.net/michkap/archive/2007/05/14/2629747.html |
| 23 | is(remove_diacritics('ÅåÄäÖö'), 'AaAaOo', 'Check swedish'); |
| 24 | # Krawfish::Util::String::_list_props('Łł'); |
| 25 | is(remove_diacritics('ĄąĆćĘꣳŃńÓóŚśŹźŻż'), 'AaCcEeLlNnOoSsZzZz', 'Check polish'); |
| 26 | is(remove_diacritics('ľščťžýáíéúäôň*ȍŽÝÁÍÉÚÄÔŇĎ'), 'lsctzyaieuaondLSCTZYAIEUAOND', 'Check slowakish'); |
| 27 | is(remove_diacritics('ëőüűŐÜŰ'), 'eouuOUU', 'Check hungarian'); |
| 28 | is(remove_diacritics('Ññ¿'), 'Nn¿', 'Check spanish'); |
| 29 | is(remove_diacritics('àèòçï'), 'aeoci', 'Check CA?'); |
| 30 | is(remove_diacritics('ı'), 'i', 'Check turkish'); |
| 31 | |
| 32 | # From http://stackoverflow.com/questions/249087/how-do-i-remove-diacritics-accents-from-a-string-in-net#249126 |
| 33 | is(remove_diacritics('äáčďěéíľľňôóřŕšťúůýž'), 'aacdeeillnoorrstuuyz'); |
| 34 | is(remove_diacritics('ÄÁČĎĚÉÍĽĽŇÔÓŘŔŠŤÚŮÝŽ'), 'AACDEEILLNOORRSTUUYZ'); |
| 35 | is(remove_diacritics('ÖÜË'), 'OUE'); |
| 36 | is(remove_diacritics('łŁđĐ'), 'lLdD'); |
| 37 | is(remove_diacritics('ţŢşŞçÇ'), 'tTsScC'); |
| 38 | is(remove_diacritics('øı'), 'oi'); |
| 39 | |
| Akron | 01693e3 | 2017-03-15 17:09:59 +0100 | [diff] [blame] | 40 | is(remove_diacritics( |
| 41 | q{Bonjour ça va? C'est l'été! Ich möchte ä Ä á à â ê é è ë Ë É ï Ï î í ì ó ò ô ö Ö Ü ü ù ú û Û ý Ý ç Ç ñ Ñ}), |
| 42 | q{Bonjour ca va? C'est l'ete! Ich mochte a A a a a e e e e E E i I i i i o o o o O U u u u u U y Y c C n N}); |
| 43 | |
| 44 | # https://docs.seneca.nl/Smartsite-Docs/Features-Modules/Add-On_Modules/Faceted_Search/FS_Reference/FTS_and_iFTS_technical_background_information/Diacritics_and_Unicode.html |
| 45 | is(remove_diacritics( |
| 46 | q/!"#$'()*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijklmnoprstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿−ÀÁ ÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ/), |
| 47 | q/!"#$'()*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` abcdefghijklmnoprstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿−AAA AAAÆCEEEEIIIIDNOOOOO×OUUUUYÞßaaaaaaæceeeeiiiiðnooooo÷ouuuuyþy/); |
| Akron | a9e8655 | 2017-11-09 12:01:28 +0100 | [diff] [blame] | 48 | |
| 49 | is(squote("baum"), "'baum'", 'Check quote'); |
| 50 | is(squote("that's"), "'that\\'s'", 'Check quote'); |
| 51 | is(squote("that\\'s"), "'that\\\\\\'s'", 'Check quote'); |
| 52 | is(unsquote("'that\\\\\\'s'"), "that\\'s", 'Check unquote'); |
| 53 | is(unsquote("'that\\'s'"), "that's", 'Check unquote'); |
| 54 | is(unsquote("'baum'"), "baum", 'Check unquote'); |
| 55 | |
| Akron | 1972c12 | 2017-03-14 17:46:55 +0100 | [diff] [blame] | 56 | done_testing; |
| 57 | __END__ |
| 58 | |