diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c568586 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +# Variables +nim = nim +nim_flags = c -d:useMalloc --mm:arc -d:debug --maxLoopIterationsVM:20000000 --app:staticlib --o:build/libzipfs-ld.a +nim_file = nim/main + +# Default target +all:./build/libzipfs-ld.a + +# Build target +./build/libzipfs-ld.a: + nim $(nim_flags) $(nim_file) diff --git a/build/libzipfs-ld.a b/build/libzipfs-ld.a new file mode 100644 index 0000000..521fcc8 Binary files /dev/null and b/build/libzipfs-ld.a differ diff --git a/build/zipfs-ld.h b/build/zipfs-ld.h new file mode 100644 index 0000000..7055755 --- /dev/null +++ b/build/zipfs-ld.h @@ -0,0 +1,12 @@ + +#include <bits/floatn-common.h> +#include <stdbool.h> +void zipfs_language_detector( + char** languages, + uint64_t languages_count, + char* input_string, + bool* successful, + uint64_t* length_output, + float** result_buffer_float, + char*** result +); diff --git a/ctest/a.out b/ctest/a.out new file mode 100755 index 0000000..4ac2555 Binary files /dev/null and b/ctest/a.out differ diff --git a/ctest/test.c b/ctest/test.c new file mode 100644 index 0000000..68a4fdf --- /dev/null +++ b/ctest/test.c @@ -0,0 +1,37 @@ +#include <stdint.h> +#include <stdbool.h> +#include <stdio.h> + +#include "./zipfs-ld.h" +#include <stdlib.h> +int main(){ + char* languages[] = {"sv", "en", "de", "nl"}; + char* test_string = " De vragen en manier waarop ze zijn opgesteld, zijn een nieuw niveau van slecht. . "; + uint64_t language_count = 3; + bool worked = false; + uint64_t length_output; + char** result; + float* float_result; + int i; + for (i = 0; i != 10; i++){ + zipfs_language_detector( + languages, + language_count, + test_string, + &worked, + &length_output, + &float_result, + &result + ); + int s = 0; + for (s = 0; s != length_output+1; s++){ + printf("%s\n", result[s]); + printf("%f\n", float_result[s]); + free(result[s]); + printf("s: %i\n", s); + }; + free(result); + free(float_result); + } + +} diff --git a/data/.gitignore b/data/.gitignore index 1626b0f..8b13789 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -1 +1 @@ -mostCommonWords.json + diff --git a/data/mostCommonWords.json b/data/mostCommonWords.json new file mode 100644 index 0000000..8343d85 --- /dev/null +++ b/data/mostCommonWords.json @@ -0,0 +1 @@ +{"ta":["slim","slide","ectu","hority","gymnastics","fugue","ipl","bist","cyclopedia","ns","onferen","exact","spacecraft","ronment","eeri","ovae","ublicati","ristophe","rchive","dime","geohack","lectri","plough","electronic","chnolog","sda","rror","phical","cio","ountri","amaranthus","hemical","nox","ineeri","cientifi","chnolo","unan","ricinf","gott","ltip","chem","stanfo","mica","bx","nalini","cliffor","nology","achelo","definin","roperties"],"yi":["moria","leyenda","spritz","datal","nguill","الأسد","الملوك","nationalism","ostarrîchi","chivas","macnuen","esprit","esnude","tammúz","camino","monsie","siván","treisen","dengizi","brivla","yiddish","ewb","قابس","kitzbüheler","busters","wrap","sodome","héodor","jdn","huomenta","eauti","imu","łužica","poemos","owari","mathema","fry","aufmachen","riso","ghtsee","manor","urity","clupea","winds","woody","xnet","propert","rtho","ulah","lh"],"he":["tcm","apple","diss","preface","raw","oloc","waze","holoc","antarctica","kiss","ambr","collec","cambr","wall","vod","ervie","ness","ted","lászló","philology","glaad","macmilla","virgini","ory","productions","wii","depe","ap","ntarctic","tain","politi","nsa","ynet","antarct","dirty","barber","ufc","hautes","arad","shape","id","arta","bucureşti","nothing","fox","voice","stern","msk","sica","scar"],"ny":["pereka","chedw","otchedw","anasiyan","otsut","zinthu","komwe","mavuto","bwino","dziwik","aphunzir","siyanasi","otsatir","otchedwa","yambiri","osiyanasiyan","atsopano","mtsogoleri","mtsogo","mayiko","katemera","zambiri","apezeka","oyambir","phunz","pamwa","sogoleri","aphunz","gwirizan","osiyanasiyana","tsatira","alandir","phunzi","matendawa","kateme","katemer","chachikulu","unzir","alandira","aperek","tchedwa","tsopano","mtsogoler","yoyamba","pamene","adziwik","ogoler","hunzir","ogoleri","matendaw"],"ur":["برنس","مشن","امنز","سفارت","ماخذ","داوواغ","رشل","متعال","وقوع","دلدل","مقامات","جذبات","طوفان","وااور","حوقل","جرمن","بنتا","زونز","الماجد","مارلن","تناسب","لادن","جانتا","حقدار","امانو","رنز","اختتام","معاملات","اآغاز","نافذ","وجاتا","محروم","ثبوت","صفحات","دارومدار","مصنوعات","ولنا","اجرام","تسر","اقتدار","منفرد","جاتا","تشدد","مندرجات","علامت","آج","خشوع","مطابقت","اضلاع","طرز"],"cs":["hrál","dále","během","příkla","mistrovstv","mezináro","později","mistrovství","istrovs","uvisejí","související","ství","říkl","která","další","ození","přík","narozen","visej","pozdě","místo","isejí","vodní","které","působ","původ","ástí","souvise","uvisejíc","místě","působi","ální","apříklad","mistrovst","ební","ející","trovst","apříkla","stoupi","ouvise","externí","tvoř","ůso","například","její","který","visejíc","uvise","narození","strovst"],"da":["lykkedes","næv","sætning","kendt","henvisn","begynd","dspill","fhæ","blive","bliver","envisn","udgivet","århundrede","floden","envisnin","blevet","blandt","yndte","ætni","yndt","findes","vandt","henvi","ælde","envisninger","begyndte","løbet","række","henvisninger","brugt","fodbold","henvisninge","spillede","gøre","rksomhed","ferencer","egyndt","henvis","indenfor","begyndt","forskellige","højere","erefter","orskelli","udover","egynd","dæk","envisni","desuden","henvisnin"],"mt":["bħal","storja","biex","zzjonijiet","nħa","ħruf","ngħa","reże","għruf","pajjiż","mhux","magħru","inħa","magħ","oħra","żmien","ġju","iżda","ħadd","ħand","madwar","ħaj","għal","tagħhom","bħala","għan","oġi","ħal","tiegħu","ħan","għru","żewġ","ħru","għand","kienu","ħafna","ingħ","għm","inħ","magħruf","agħru","għad","ingħa","għandu","dizzjo","tagħha","ngħ","oloġi","ġin","wkoll"],"sm":["ausetalia","faatasi","afaaso","aitaim","alafaasolopit","setete","faasol","maoaiga","aoao","amaoai","iunaite","usetali","nofoa","lolagi","talafaasolopito","aoaoga","auseta","tuaoi","galue","auina","lalolag","lautele","faavae","aofai","faava","faasolopito","itulagi","lalolagi","amaoa","vaega","nofoaga","ausetali","alafaasolopito","tausaga","amaoaig","aoai","faav","tamaoa","tamaoaiga","aoaig","usetal","aloaia","maoai","asolopit","faiga","auaig","alolag","alafaasol","asolopito","faitau"],"az":["muşdu","isələr","olmuşdu","müharibəsi","ətləri","qüvv","mühari","müharibə","qədər","mühar","çox","tərəfindən","olmuşdur","hüseyn","disələ","əsgəri","yerlə","uşdu","əlaqələr","ərəfind","disəl","yerləş","fində","aktyoru","adisələr","mişdi","görə","erləş","muşdur","müha","rəfindən","ühari","erlə","əfindən","əhərin","üharibəsi","tərəfin","ərəfin","lmuşd","ərəfində","ngiltə","indən","əsini","rləş","gürcüstan","müharib","tərəfind","xüsusi","uşdur","ildən"],"hi":["ndian","learned","painted","ntfs","upanis","ide","nsio","nsl","root","sansk","emotional","rammes","slat","encyc","ordea","ajay","palgra","diagram","munic","aphi","nishads","pain","sanskr","ynthes","drawing","naoh","pmos","ychology","nskrit","key","electrical","robotic","psyc","onary","environmenta","guages","ieee","vernment","complet","swat","sony","truths","ks","turner","ancial","promote","pru","bhubaneswar","ictiona","cloth"],"te":["isni","sometim","makers","buildings","ranslatio","bead","unaspirated","vayu","avou","kharia","gce","croforma","lakshmi","omeopat","sylvestris","nates","pply","ractur","hosting","abroad","ittoo","sigma","barnes","gowd","drdo","soviets","pounded","telangana","ebrati","ludin","hex","appro","cpm","honduras","lithuania","penden","wherein","honorary","eprin","etext","fluori","avice","opposition","arieties","sl","wałęsa","wheat","aurobindo","llanth","ramakrishna"],"si":["shoul","onpr","esident","onsider","mmo","anim","breeding","eliev","ounci","statis","inee","establishment","prepar","uddhist","ecom","ahubudu","oppositi","tsca","versions","listen","lowed","sinhala","sisted","rated","radha","xception","francisc","dhis","contain","latform","actice","sociation","taining","ranslati","worl","schol","urbino","literary","aracte","lowe","ondu","meroon","itted","ijayanagar","udesana","ology","anima","ontains","function","carrying"],"tl":["maramin","yroon","digmaa","pamamagi","kanilang","ikatlong","ababaihan","digmaang","tagpuan","katulad","agtiba","pagtatay","maraming","bumuo","digmaan","magitan","mamagi","ipinangana","kalahok","dalawang","panlabas","amagit","amamagitan","nagkakaisang","atagpua","kaharian","pamamagita","anilang","panlupa","tatagpu","ndaigdig","daigdig","pamamagitan","maaari","itinatag","lumipad","pumatay","pandaigdig","kakaisan","pinuno","mayroon","igdig","maaaring","pamamagit","pamamag","nanganak","atatagpu","anlabas","ipinanganak","rehiyong"],"sr":["centar","zofskom","olji","drugim","врс","zavisi","fabrike","zemlje","teorije","kacija","ogrami","umevanje","engl","iams","ун","sjedinjenim","kulsku","еру","olj","nauke","samih","veoma","zajedno","fakul","гр","pokriva","anomal","whitley","enika","vij","zgrada","nebojša","koraka","serija","баск","storijs","raxacu","randall","cited","živi","лп","crevo","oljašn","kila","лен","vojska","сп","njego","periodu","hemijsku"],"bg":["бан","оск","ac","vanilla","мир","wwe","tional","нбу","stay","дв","ик","ним","онд","нта","jenny","mérite","еп","hms","данс","lgari","ск","ng","informat","ди","вас","сем","deep","бна","аел","рмс","пан","purple","girlfr","оф","уи","palazzo","вец","сас","non","ovw","ит","ам","одс","шк","нс","ilde","кеш","мани","су","оу"],"ps":["درلوده","درلود","حقوقي","لرغوني","ترسره","لومتره","ددغه","ولسمشر","موزيم","برخه","نيولو","هيله","طالبانو","هيواد","سياحت","خداى","رمختللي","ساتل","شعرونه","لرغونتيا","ننداره","موندل","مرسته","هغه","مشران","هنرونو","مشرانو","شومان","ييزو","وهنتون","مخه","وروسته","ملتونو","دويم","هكله","ارونه","ولسي","ورسره","تاييد","خواني","ماشومان","ادونه","موخه","دهغه","هرني","سويل","هغو","استازي","بوخت","ترلاسه"],"hu":["szerint","része","emély","gyze","egyé","zető","yzetek","több","született","további","egyik","külön","forráso","ülés","között","mély","gyzetek","jegyzet","tkez","gyzete","rráso","együ","artoz","kerül","együtt","jegyze","zületett","talál","tovább","személ","zemél","ületett","később","jegyzetek","által","jegyzete","vissza","terület","zemély","szemé","egyzete","orrások","tották","források","rrások","gyzet","álto","egyze","születe","vissz"],"ca":["escriur","inclou","referències","erència","església","període","havien","rèncie","jançant","xarxa","sglés","enllaços","alitza","aquests","èix","segü","mitjançant","espècie","següe","mateix","alsevo","enllaç","perquè","escriure","enllaço","sglési","mateixa","omençar","anomenad","vegeu","procés","assolir","següent","olímpics","eferènc","això","utilitz","començar","esglés","nllaço","ferències","esglési","només","rganitza","referèncie","sglé","erèncie","qualsevol","referènc","aconseguir"],"mk":["сос","мс","смо","кц","ме","епо","кт","хив","пу","mgh","маа","ре","мвр","вта","plastic","нов","она","аа","нуб","охо","авс","ртв","еу","оа","са","мит","ар","пом","акед","ета","театар","ср","атп","номс","ев","епа","ои","сè","адо","евро","дом","икт","нт","ману","ор","ко","улога","раф","мрт","ек"],"el":["ψει","φτασε","μεση","νωση","βδ","γκου","μπαρ","γκυ","νοδο","λοι","νειρο","θεσ","ννο","σμιο","πειτα","εδα","ρχισε","εκπα","διου","ναντι","μεινε","τομο","γινε","πλην","ντολ","ζιλ","ργου","ρχοντα","κοπε","ζακ","λλων","θων","δωσε","λυτο","νιου","μετο","γιου","κρου","ντελ","νωσε","λγεβρα","μουρ","χθη","νθη","κτε","προφ","λλοτε","φεση","λωση","τοντ"],"es":["utbolistas","lmaré","varían","oficina","explicó","comenzó","calidades","ubicado","mujer","hombres","jugó","abandonó","encuentr","anunció","ncuent","llegó","intérprete","almaré","ubicada","pobreza","lreded","semanario","localidades","tenían","ocupó","había","véase","dirigió","mienzos","equipo","trabajador","lanzada","mencionada","amplios","umbral","areció","lanzado","finalizó","alrededores","corrió","tenía","nombrado","habrían","obtuvo","pusieron","cápita","encuentra","futbolistas","ncuentr","acuerdo"],"ar":["تسعة","أقل","بميداليات","وأصبح","ولمبيون","أحداثها","المتعلقة","رياضيون","الصيفية","أطلق","إحدى","نيويورك","الشرطة","يوجد","مباراة","بميدال","إلى","إسب","بشأن","البحث","إضافة","أفريقيا","أسرة","إيطاليا","راليو","تقع","المختلفة","رجالية","لتالي","إسبان","أستراليا","إسبانيا","بميد","خريجو","السلطة","لهذا","تحتوي","منذ","أولمبية","الأخرى","الأكبر","عندما","لأولمبية","أولمبيون","أولمبيو","أخذ","مبيون","فإن","لإسبا","إنجلترا"],"it":["rattem","tenuti","avviene","migliore","vennero","ggiunse","ritornò","bambino","ottenuto","esordio","ridurre","almeno","ottenne","passata","piuttosto","sposò","copertura","cortometraggio","utilizzato","imparare","furono","prigioniero","rimase","intitolato","poiché","discesa","custodia","condizioni","igliori","introdotto","rubare","uccisi","allievi","controllo","spettatori","rifiutò","interrotta","seguito","gliore","ottenere","collaborazione","fratte","enominata","iniziò","distretto","tilizzato","giocatori","giocator","tornato","lizzato"],"bs":["džamija","pogledajte","prenose","manlijs","regija","alternativne","vrbasa","bjekata","živjela","bjekat","hiljade","neuspjeh","sposobno","živim","vrštena","naseljenim","nalazili","obrnuto","turneja","livnu","dovela","podigao","izrada","zaliva","ogledajte","jabuka","edajte","takvom","osmanlijske","smanlijsk","ledajte","osoblja","dužina","hemiji","grebena","pogledajt","verzija","uzgoj","pobjedu","hiljadu","daljena","pogleda","ogledajt","pružaju","proteže","itavog","masleša","pogledaj","objeka","objekata"],"sv":["ölja","medelhöjden","tillhör","krevs","ppströms","släkt","vånare","lavrinnin","öljande","kvadratkilo","atteny","beskrevs","avrinnin","vudsakli","lhöjd","kvadratkilometer","huvud","gsområdet","delavrinningsområde","uvuds","flödar","avrinningsområdet","länkar","huvuds","rinnings","släkte","huvudsak","vrinningsområde","utlopp","kvadratkilom","inningsomr","enligt","ttendrag","räknas","källor","vattnet","ingår","avrinningsområde","invånare","nningsområde","kvadratkilome","insjöar","innebär","eskrevs","följan","smhi","rinningsområd","uppströms","llhör","släktet"],"am":["unami","work","zedit","sur","olines","lando","bbl","geezed","lgache","eezedi","letter","lled","zazaki","stanford","ublished","nati","ictionary","unum","icati","ezedit","kimoja","saňa","gineeri","ezedi","gusmo","deveno","subd","nachbin","ezed","bc","inato","party","lack","website","holi","volas","logical","university","versit","hogbo","digno","expe","nowle","march","poseda","holiness","viwili","gical","com","lingvo"],"sk":["cúz","neskôr","ktorých","pretože","adateľ","pôvo","yšlien","ancúz","ancúzs","šetký","chádza","ádza","francúzs","teľo","eľov","vateľ","všet","francúz","ateľ","rancúzs","podľa","osť","eľo","skôr","tiež","pozri","všetk","teľ","rancúz","cúzs","rokoch","asť","úzsk","najmä","torých","pôvod","externé","ktorého","rancúzsk","cúzsk","chád","môže","vplyv","ujúc","dielo","ateľo","pôsobil","torýc","ancúzsk","chádz"],"tk":["eýd","üný","ndaky","ilýä","döwlet","riniň","aýy","anýar","lupdy","laýa","ýet","updyr","bolýar","ýyl","giň","boýun","oýu","iýet","ýerle","ýyll","iýa","laý","ýat","iýe","oýun","rýä","bolý","ýylda","ýaş","anyň","aýd","onuň","soň","öwl","öwle","döwl","siýa","olupd","lupdyr","edýär","ýän","ýyly","lýär","esasy","ýer","ýaly","şähe","ýerl","döwle","ýasy"],"jv":["anggita","olèh","préntahan","lumrahé","ambakipu","loroné","atunggal","wekdal","ratéla","yambakip","pratéla","sawisé","njenengan","satunggal","piyambak","wijining","anadya","tumuju","déning","anjenengan","delengen","manèh","aprént","awijining","ratélan","atélan","péranga","sanès","misuwur","sawijini","akèh","atéla","pérangan","papréntahan","dhéwé","bakipu","kaliyan","piyamb","yambaki","akipun","duwé","laladan","wiwitan","unakaké","praté","piyambakipun","satungga","dhèwèké","pérang","pratélan"],"or":["forecast","kangri","developmen","dinates","forecasts","dubey","bengaluru","autho","rspec","sheet","kgroun","gann","kgr","intere","backgroun","ronamraj","ideshow","ckground","celestial","partmen","nment","pronunciati","clar","axial","biogra","orissa","ackground","training","gic","artmen","mandira","sect","ucherr","ific","dro","beco","backgrou","pædi","intercity","karna","pme","angalor","himacha","dicine","storati","thinking","slidesh","etrie","ackg","slidesho"],"fi":["mmäin","mmäine","olivat","yhdysval","öhemmin","jaits","ensimmäise","ensimmäin","vuodesta","lisäksi","ensimmäis","simmäi","mäinen","dysv","seuraa","kuitenkin","jälkeen","mmäis","yhdys","lähteet","nsimmäis","kseen","tettiin","jolloin","dysva","jonka","ensimm","yhdysva","myöhemmi","ensimmäi","simmä","öhemmi","myöhemmin","uraav","mmäi","hdysva","mmäise","vuodest","simmäinen","ilpai","yhdy","kanssa","nsimm","ilpail","mäise","öhemm","etään","vuosina","jaitse","sijaits"],"ga":["timpeall","riceá","iceá","aontaithe","éanam","íonn","scéa","timpea","íreolaío","dhéanamh","scéal","dhéana","dhéanam","chtaí","ánach","féidir","aíoch","reolaí","aíocht","ríomh","dhéa","tháinig","bíonn","fuair","héanam","tíreolaíocht","laío","tugtar","iriceá","bliain","bhí","bhliain","héanamh","tíreo","haois","bhfuil","dhéan","chéad","aíonn","aíon","aíoc","aontaith","impeal","céad","áirithe","úsáid","déana","raibh","stáit","mhí"],"is":["þjóð","darís","isráðherr","því","kvikmyn","rðlaun","bandarís","darískur","þegar","onungur","það","fræði","andarís","þeirra","fædd","kmynd","tofnað","arísk","ikmynd","einnig","tofna","darísku","bandarí","ónli","fræð","tónlist","rithö","ðher","ráðherr","skáld","bandarískur","júlí","stæð","rsætisráðhe","félagið","síðar","þátt","rithöfun","hefur","þeir","stofnað","yfir","þau","ofnað","dæmi","þá","meðal","stofna","þó","júní"],"no":["referanse","brukes","fotballspiller","hoppet","utdannet","utgitt","opprinnelig","tbredelse","oppgave","utviklet","begynne","ktigste","spesielt","kjente","regjeringen","sykkel","utøver","utdel","kalles","feranser","utbredelse","egynne","eltake","opprettet","tomten","oppene","tgivelse","referanser","innbyggere","sykehus","sesongen","opprinnelige","utenfor","gynnels","spilte","allspillere","nnenfo","skjell","fotballspillere","norgesmesterskap","fotballspille","opprinne","øyene","viktigste","inntekter","begynte","otballspill","jeringe","oppdage","egynn"],"ht":["konsève","fòme","fòm","plizyè","oubyen","pèsonalit","jewografik","ameriken","radiksy","itilize","relijyon","jewogr","tizasy","enpòtan","antye","dyodesimal","genyen","vwayaje","pèsonalite","panyòl","dakò","kapitèn","èsonal","reyalizatris","segondè","avèk","dezyèm","depatma","powèt","pwod","tradiksyon","ayisyen","kòmanse","natirèl","wondism","pèsonali","jewografi","relijy","relijyo","gwoup","nivès","depatman","jewograf","wografi","jewogra","ewogr","sitiye","deyò","wograf","ewografi"],"lb":["gestuer","nséisch","gestuerwe","schrëftsteller","lëtzebuergesche","ëtzebuergeschen","lëtzebuergesc","gebuer","hauspille","schrëft","estuerwen","nséisc","auspille","ergesche","uerwen","ëtzebuerges","franséisch","zebuergesc","nséische","ranséisch","tzebuergesc","uspiller","hauspill","buergesc","schauspille","äitsche","schrëfts","lëtzebuergeschen","schauspiller","franséische","chrëft","auspiller","gestue","chrëfts","séische","ebuergesch","ebuerges","stuerwe","anséisc","uspille","gëtt","éische","gestuerwen","evenementer","stuerwen","zebuergesch","séisch","auspill","uergesche","estuerwe"],"sco":["reetish","itbawer","kintra","politeec","kinrick","eenister","commonties","eetish","itbawe","roushie","ommonti","pairty","breetish","leetit","thwast","atween","heichest","daiths","oliteecian","pairt","politeecian","auncient","meenister","unitit","airtins","muisic","tbawer","fitbawe","naitional","teecia","fitbawer","breeti","othwast","whaur","fowk","fitbaw","meeniste","naitiona","lairge","mairch","commont","liteec","seicont","anerlie","fitba","fitb","foondit","meenist","lunnon","acause"],"uz":["ashkil","toshke","sifatida","aoliya","mavjud","uvch","viloy","fatida","faoliyat","lashgan","boshla","ishlab","oshken","liyati","viloyat","xizmat","faoliy","davlat","faoli","boshqa","ashgan","boshq","toshkent","lashga","dabiyot","joyla","sifati","viloyati","joyl","foyda","sarlari","loyat","shoir","nashr","qilgan","ishlar","ishga","iloyat","shlari","sifatid","foydalan","tashkil","adabiyot","foyd","aoliy","shgan","ifatid","dabiyo","faoliyati","ifatida"],"so":["koonfu","dawlad","ooyink","waqooyi","icmaal","adkii","deegaa","leedahay","ilaab","caasi","aliyeed","aduunka","dhex","taariikh","yaqaan","eeyay","koonf","deegaan","asimad","leedah","magaalada","magaalad","waqoo","edahay","agaalad","riikh","dhexe","yihiin","anadk","wadanka","aliyee","waxay","koonfur","caasim","bilaab","wuxu","waxaan","caasima","wuxuu","ahayd","waxaa","dadka","aariikh","agaalada","oonfur","degmada","eedah","gaalad","waqooy","waxaana"],"co":["nsirvaz","unghj","qualchì","pricisa","dinù","quiddu","arechji","ghji","hjungh","ghjuv","rechj","giaddu","rifarenzi","picciolu","aghjun","annantu","ghjuvan","oghje","incù","hjunghj","aghju","ghjunsani","ghjun","cunsirvazi","echji","irvazi","parechji","ghjuva","pigliendu","eghja","righj","cunsirvazio","nghj","ghjungh","hjoni","righjoni","aghj","cuntinendu","travagliu","arechj","pruver","cunsir","ghjon","cunsirvaz","rifarenz","hjunt","chjesa","righjon","spergugliati","naghj"],"fa":["تورس","خشونت","دلخواه","داشتند","بافر","آنها","ارآموز","آرمن","بلغارستان","بشود","دستاورد","رهبران","آرارات","هدفمند","آرتور","خاطره","ذراندن","فروسرخ","نترباس","آنه","بهبود","سروده","آورده","شود","دهنده","وخامت","احاطه","هستند","فتارها","رفتند","بتواند","ندارند","آوران","ذارند","هاست","خواهد","ندادن","بالاد","دارند","سربازان","آنجلس","خلبان","هنان","رتوها","دستمال","لهستان","نامش","مأموران","مستغلات","حداقل"],"yo":["àìjíríà","yìí","ìtàn","ètò","nítorí","àwo","ìjírí","ìlú","èyí","nàìjír","nàìjíríà","àìjírí","ìjíríà","yóò","nìkan","pàtàkì","olórí","mìíràn","òní","ìwé","gbègb","ìpín","ríà","ìkan","ìlà","àrin","bìnr","nínú","nígbà","àwon","ìjír","jíríà","íríà","jáde","agbègbè","nàìjírí","nípa","méjì","àìjír","nǹkan","obìnrin","tàbí","oòrùn","ìgbà","náà","àdúgbò","kejì","lórí","ìdí","bákan"],"be":["гк","ат","нду","usi","слова","рэс","мас","нва","rt","адр","мд","бду","аат","кс","aut","мус","вак","уп","пес","olo","рва","рга","ас","бэ","арп","thor","гну","os","ваг","спа","сат","пат","ва","кз","ygim","кб","ду","ката","уа","int","азб","аг","нна","fl","ата","мак","цт","рв","ус","дуа"],"rw":["ushyirwa","majyaruguru","ubucur","yabereye","majyaru","cyicir","kwicwa","jyarugur","wemewe","ubucuruzi","ishoramari","zibaho","amarushanwa","imiyoboro","imibare","ishora","majyepfo","batuye","yakomeje","ativiy","baturarwa","yepfo","amagambo","gukorwa","bufite","mahirwe","intungamubiri","iyoboro","cyitwa","habamo","cyiciro","busaba","ubuyobozi","imyunyu","ibijy","nibwo","majye","ubucuruz","kutabo","cyongerez","ubilig","impamvu","iburasirazuba","rakome","ashanya","ashyiraho","yahawe","isigaye","cyong","rwera"],"af":["tydens","sterftes","blisee","ublisee","enskaplik","skryf","orspronkl","orwinning","amptelik","rskeie","verskeie","slegs","angebie","gestig","aangetref","sifieke","rigting","oorspronklik","gevind","omgekom","ustralies","angetre","erskeie","besoek","aangebied","wetenskaplike","getref","publiseer","wetenskap","koninkryk","tigerj","tralies","renjae","ampteli","kanadese","russies","australiese","geleë","terwyl","voorsitter","hierdie","stigting","gewoonlik","hoofsaaklik","leë","vervaardige","vervaardiger","stigtin","skakels","sogenaamde"],"ne":["athemati","ewerk","introducti","yclopædia","biolo","bacteri","translati","stud","cumen","blockquot","docum","pyramids","ucati","ster","elopment","faroe","vernmen","chnology","check","eenl","sanka","rae","opula","switzerla","cause","nts","ertil","cyc","archives","speck","itutio","riti","bam","reenla","orl","thni","communi","anguages","introduct","rld","matolog","prote","hrö","organizat","ilosop","greenl","irt","mic","enomic","nslatio"],"haw":["pahuhopu","akauh","omohana","nakauha","uhopu","nakauh","kūlanakauh","makuahin","aupuni","auhale","ukalani","makuahine","ūlanakauh","kekahi","aīa","huhop","uhop","kūlanakauhal","kapikala","akauha","pahuhop","pukalan","kūlana","ahuho","ōlelo","omohan","makuah","kapikal","pukalani","ahuhopu","kauhale","komoha","kūlanakauha","kēia","komohan","ahuhop","kūlanakauhale","ūlanakauhal","kūmole","kūlanaka","anakau","meakanu","akuahin","makahiki","lanakau","wokel","pukala","ūlana","komohana","lanakauhal"],"gu":["ftp","vlan","bhag","scove","atm","eom","athem","ferences","ahavi","jarat","believe","spe","rther","geomet","gio","serv","gujara","ations","therapy","lock","sheets","mhz","ctiv","properties","edie","gujar","perih","prop","floral","transition","mgm","exist","beautifu","ujara","spec","athemat","paheli","opertie","feature","changi","whether","geometry","watch","jain","rfc","chennai","pect","rine","gta","ered"],"kk":["жб","чкп","му","шеб","бо","като","сом","рь","он","мб","др","зи","ав","абб","туралы","хр","ку","ылмысты","ауданы","ео","тп","nk","бол","кп","ак","ба","лы","тб","associ","зб","топ","ст","им","шет","инал","хт","xt","рас","нек","ануа","мн","хик","пк","assw","тж","рр","жыл","iation","ылым","шр"],"ka":["waste","olut","atoi","vrubel","frame","alexand","yclopæ","delweis","philos","informa","becker","cilician","cathol","macmillan","chges","losophy","artillery","hikr","rave","scenes","mirrors","harlem","ædia","excuse","tanni","tga","arménie","lishing","carmen","eutsc","ogni","decres","anon","dungan","rosenberg","graphemica","zhang","verythin","azerbaijan","promi","ure","hold","ikimed","brot","festival","logic","blishi","brooks","aphemica","thames"],"uk":["сб","нш","во","упа","кну","сес","фм","пун","заз","кту","internationaux","yard","онк","destiny","вр","бч","мма","ternazio","ват","cerwa","ону","ronic","рев","го","озп","апл","draws","вд","уга","halle","tp","стов","ато","сбу","ода","бате","ова","мон","гес","кма","уку","trasbourg","рем","oub","леп","амс","ernationaux","тов","дкр","зво"],"ru":["нко","да","узи","оги","уво","па","ека","оз","motors","ollywood","пш","пло","ики","мми","сф","мот","см","ржд","сар","чм","ер","вп","мбр","тко","фаи","ога","гас","hessga","ебол","рудн","упи","гд","ив","ublishers","пр","миа","тц","alogue","prague","zons","dq","дот","ндс","пна","дск","по","ervice","sterling","атр","вк"],"et":["erineva","kasutab","värvid","sajand","keisririigi","pärast","näiteks","liideti","suubub","sajandi","nõuk","viima","päeva","odusli","ettevõte","usvaheli","mõju","koguni","kogud","rineva","svaheli","mööda","hitatud","põhjas","pärand","kogude","nõukogu","kunstis","vaheli","irjutatu","jaanuar","kirjutatu","nõukog","pannaks","kohaselt","sündi","andmeid","kasvab","ukogud","mõiste","ehitust","meedia","õukogu","õuko","järel","õpet","agajärje","ärast","tõttu","nimetat"],"xh":["ugqi","xesha","tshintsh","gqibel","eziliqela","ininzi","isety","isetye","amaxesha","axesha","zimbini","iindawo","umzeke","ininz","setyenzi","phantsi","ezininz","umntu","umzek","isoloko","kwaye","baninzi","umzekel","kuthetha","okokuba","zininzi","yenzis","amajoni","amapolisa","ngurhulumente","ixeko","ithetha","isetyenzis","etyenz","aninzi","ezininzi","umzekelo","okokub","omnye","isolok","ekuthiwa","iqela","kumhla","dolophu","ninzi","ixesha","amaxes","doloph","uthetha","tshint"],"lv":["ūsdien","eiropas","izmanto","aptuven","galven","gadsimta","adsimt","dienvi","vēsture","iemēra","espē","ptuveni","ļoti","uzskat","veidot","ētisk","aptuveni","iemēr","aptuve","izmant","valsts","gadsimt","gadsi","skatīt","mūsdie","tomēr","mūsd","emēr","kļuva","ījum","piemē","dsimt","atrodas","iespēja","attiec","emēram","dzīvo","bieži","adsimta","dienvid","piemēram","ērij","ņēm","ēram","līdz","noteikt","ētas","ēšan","pasaules","vēro"],"tr":["etmiştir","değiş","tmişt","rilmiştir","olmuşt","günüm","oluştu","gurbetçi","nedeniyl","belir","geliştir","mişti","umlular","lmuştu","günümüz","oluşu","luştu","gurbet","futbolcu","muştu","öğren","muştur","kariyerine","doğumlul","lmuştur","sporcular","uştur","madalya","diğer","çiml","lmuşt","nedeni","oğumlul","doğumlular","sahip","lmişti","mlular","dönem","kulüp","doğumlu","luştur","birçok","dönemi","oğumlu","çimler","utbolcular","nedeniyle","dönemde","uygul","oluştur"],"ms":["darinya","pelbagai","ditubuhkan","erkahwin","terpaks","enuhn","dibahagikan","erketu","pengerusi","ngingati","berkahwin","akuatik","rumpai","berkhidmat","terutamanya","apatah","enyerta","bawahnya","ekornya","berlalu","ijumpai","kesemua","kenalan","selepas","nubuha","jurute","rcubaa","engeru","dilancarkan","iaitu","berbandi","dibina","ilancarka","menubuhkan","menyuar","diatasi","penubu","penyesuaian","ketegang","amfibia","menyertai","berbeza","ngerusi","haiwan","keguna","letupan","gerusi","lazimnya","kegunaan","sotong"],"fr":["footballeur","eprésente","troisiè","incipalemen","défait","réalisation","lorsque","alleur","néerland","fonctions","ponaise","endémiq","réalisé","conservées","néanm","successivement","troisièm","entraîn","réalisati","laquelle","écurie","chelem","suivant","augmentation","décrit","desservi","connait","placés","annuelle","entraî","évolue","constitué","récemm","déclaré","déclare","cénario","baisse","missionnaire","déplacé","ensuite","lorsqu","soutien","restée","cependa","équipes","remporte","suivante","confié","ffondreme","scénario"],"lt":["šioje","lietuvoje","onalinis","teritorijoje","trijų","gydymo","nyderlan","stadionas","rytų","asamblėja","stemos","aikoma","sistemos","ojų","salų","galėjo","gyvenvie","ybių","ištekl","karų","kunigaikšty","ndijos","tijos","dydis","naudoj","sovietų","kalbos","vakarų","yvenv","dažnai","gyventoj","asaulio","zijos","onijos","sudaro","pasaulio","kurių","rūmai","ėse","vadovas","kitų","įsikū","gamyba","vidinė","didėja","tūros","valdy","didelis","lenkų","įvair"],"vi":["trình","nghiên","nhóm","giúp","kích","ngư","toán","mình","trưng","giáo","này","loài","chơi","giành","khác","giáp","kính","sư","tiêu","nơi","tương","vào","lĩnh","khách","cơ","tính","doanh","giám","chương","thơ","trùng","nhánh","rình","nông","vươn","ngoài","xây","phép","trúc","hơi","xanh","cánh","chuyên","tiên","quý","vương","cách","cư","như","nhưng"],"de":["benfall","plätze","befinden","ebenfal","erreichte","beispielsweis","lspiel","weiteren","bernahm","olympischen","nzelnachwei","erneut","musste","darunter","zelnachwe","beispielsweise","kehrte","zelnachweis","folgenden","teilnehmer","ltmeistersc","benfa","eingesetz","erheiratet","ebenfa","ielswei","absolvierte","besucht","ispiels","kleinen","ausgezeichnet","gehörte","mannschaften","einzelnachweise","übernahm","rzielte","eiratete","aufgrund","spielsw","lnachwe","führte","araufhin","veröffentlicht","amerikanischen","gelöst","spielte","olvierte","ebenfalls","studierte","pielze"],"st":["letsoa","bophirimela","motsemoholo","fumaneha","olokil","pahetsen","liphetoho","lipoloti","tsejwang","mashome","enyelle","lipolotiki","fuoa","tummeng","bophirimel","eholimo","kenyeletsa","olokeh","epahet","tshwaetso","tsemoh","arohane","tšebeliso","moahloli","moholo","lengolo","bophirime","leqephe","pahets","setjha","phatlalatsa","boholo","phahameng","qobell","fetohe","kenyelletsa","bolokil","moruo","kenyelle","ahetse","univesithing","thalets","leholimo","nepahetseng","lahetse","dilemo","kenyellet","waetso","ipakeng","khethiloe"],"tt":["urnaşq","şähäre","zmät","bolğar","cibä","keşelär","böyek","çelege","älif","berençe","ğäskär","lärene","könyaq","yäşäw","äsendä","öleşe","kimiä","böten","urnaşqan","patşa","qädär","bularaq","tözel","soñraq","yöz","äskä","rnaşqa","şähär","mäğl","itelä","mäskäw","rneñ","naşqan","äklär","xezmät","kçel","äkç","dönya","belän","töş","qatnaşa","relgän","xäzer","räsäy","naşqa","töze","torğan","iädä","nigez","äkçel"],"sw":["patikana","serikali","askazi","utawala","tarehe","kutoka","uingereza","upande","askazin","ilikuw","nawez","kutokana","kiarabu","kimatai","mjini","kutumia","makubwa","mbalimbali","iingere","masharik","kimataifa","mataifa","kiingereza","nyingine","msingi","muhimu","mbalimb","ilikuwa","marekani","uingere","kaskazini","uchumi","duniani","alikuwa","zilizo","maeneo","skazini","wengine","magharib","mashari","pekee","kimataif","sehemu","kaskazi","ulikuwa","hivyo","pamoja","mashariki","tofauti","magharibi"],"my":["ternative","isio","haw","signals","bmt","maung","taungth","kone","rites","squash","poke","markus","zhejiang","nypd","ampere","vuitton","spleen","kyar","siripo","pact","presenc","inja","yae","business","mpr","mawlamyine","kutkai","ngb","nsn","llboa","thar","kyauk","tds","gway","ductiv","psychedelic","pi","celine","lternati","ocka","longja","hoover","offical","urre","mir","honky","kyun","aun","ctronic","luk"],"sq":["dryshm","ndrys","drysh","kohë","kombëta","ërgji","përfa","ëhe","ërdor","yshm","kryes","ndrysh","jetë","përg","përdo","këtë","kanë","perënd","ryeso","ëtare","botë","ëror","krye","shteti","ësor","tëror","ndërsa","ërfa","zgjed","bëtar","përdor","lidhje","gjatë","tëro","gjuhë","perën","erënd","ërgj","pjesë","shtetit","kryesor","ombëtar","ësis","mbëtar","janë","ombëta","sisht","zgjedh","ndryshm","përgjith"],"su":["dipaké","unakeu","sahiji","angrupa","atkeun","nyieun","kabéh","ungtik","panungtung","mangrup","marcapada","ngagunakeun","ngulik","umutkeu","kaasup","sababar","sababaraha","dumasar","leunge","désain","gunake","séjén","kaayaan","gunakeun","mangrupa","angtukeun","sakabéh","saterusna","agunakeun","ilahar","tutuwuha","paraméter","saterusn","lungt","tkeun","spréad","ngeunaa","arcapa","sababarah","ababaraha","probabiliti","numutkeun","ngeunaan","séjénna","deukeut","nangtukeun","élmu","gaguna","geunaa","saperti"],"fy":["dútsk","kriuwer","erlânsk","nederlânsk","skriuwer","ferstoarn","estjoe","keuns","ikaansk","rjocht","nederlân","keunstskilder","eunstskild","earste","bestjoer","eunstskil","derlâ","nederlâns","kriuwe","bestjoe","tjoerde","skriu","keunst","amerikaansk","ederlân","derlân","skriuw","ûnde","jochte","sjoern","rjochte","stoarn","sjoerna","skriuwe","ûnder","stjoerd","ederlâ","nederlâ","erikaansk","sjonger","feriene","ferstoar","ederlânsk","riuw","frânsk","steaten","riuwer","ânske","skriuwster","tskilder"],"gl":["exercicio","bxe","despois","coñecido","xeito","proximidades","algúns","maioría","esenvólve","poñer","poboa","súas","persoas","inclúen","circuíto","podería","oñeci","oximida","uncións","alguén","lonxitude","ligazón","xect","igazón","lonxitu","algunhas","mellorou","cabalos","ligazó","imaxes","xecto","cións","conxunto","esenvolveme","proxec","aínda","xeral","obxect","xermánica","obxectiv","moitas","onxitud","ñeci","calquera","coñécese","ademais","véxase","moitos","coñe","desenvolvemento"],"pt":["gaçõe","participou","escreveu","dezembro","próprio","rticipaç","populaçã","recebeu","ezembro","espaço","nasceu","morreu","devido","março","começou","população","lançado","ições","possui","opulaçã","somente","igações","ligaçõ","estadual","exército","britân","stação","alemão","gações","ligaçõe","relação","prêmio","ligações","medalha","ficou","canção","gaçõ","eleições","origem","geralmen","igaçõ","maçã","campeão","igaçõe","articipaç","algumas","estação","missão","produção","itâni"],"en":["ubseque","addition","proximatel","signed","tablished","initially","formed","established","ccording","whites","llowin","dfathe","reserved","continues","aunched","throughou","rebuilt","onsibl","llowi","hospitals","overall","counted","launched","esignation","playing","ximate","lengthy","membership","successful","mploye","operates","ditionally","dditio","wrote","served","equentl","pitchers","lowi","sthumous","omewhat","rected","tablishe","omprise","onvicted","complexes","appropriate","frozen","cluded","erted","mature"],"hy":["sul","enciana","loneos","exhibition","ovat","filmf","allied","ecembe","pill","ancesc","eut","orie","arli","picture","vt","nct","offic","demand","lab","arqueo","gesellschaft","ilm","heal","sheila","olle","giorno","ounta","nuestro","anbul","tbol","meizu","comet","portbox","piano","assessment","vent","mmun","rsssf","melb","sap","trole","niversit","ua","croatian","kunst","nstitute","riders","ntertainme","guard","revue"],"mi":["kaihaut","anatang","raupar","tairangika","kīngi","whakatūria","heinneman","tohutoro","ōngon","katūr","whakatū","ahiterei","tekau","kōrero","whinirana","ohutor","tirohia","tawhito","piripīn","tereiria","tuarima","tohuto","pōtatau","ohutoro","hutoro","kaihautū","ngahere","ohuto","tēnei","wehenga","nōwei","moutere","ereiria","ētahi","hakihea","aihaut","ereiri","whiri","wairau","hutup","whero","haratua","tohutor","tētahi","whakapa","hautū","ēnei","kainoho","ahitere","ahitereiria"],"bn":["librari","sons","reso","lion","hours","ial","estimate","blishe","medal","philosop","nmenta","stephen","ponsors","te","tr","ishe","alexander","epics","sans","intern","arlington","blished","government","computer","illa","rd","gions","elegans","imm","ovemb","manual","istics","lig","kirk","ecember","ronmental","samten","shrines","ick","ine","spread","archi","cee","hole","richard","inis","ph","reami","ittee","houses"],"cy":["cyfans","genedigaethau","yddw","rlywydd","wolaeth","chwarae","unedig","mehefin","gaeth","wrth","gwlad","wolae","yfanso","awdur","gwleidyd","marwolaethau","gwleidydd","flwy","gorff","efyd","roedd","tachwedd","rwydd","lunyd","weinidog","yfans","ithiau","wleidyd","bardd","hydref","marwolaet","genedigaetha","arluny","ffilm","arwolaet","brenin","gwleid","lywydd","gorffen","eithiau","nofel","gorffennaf","arlunydd","arlunyd","rlunyd","iaet","rwolaeth","iaeth","nedigae","nedigaet"],"pl":["których","rzypi","zypisy","następnie","następn","nętrzne","tępnie","udział","iśc","później","nętrzn","stępnie","gatunk","został","zdobył","stępu","zypi","rzyska","miejsce","najduj","znajdu","została","które","przypis","liści","rzypisy","zewnętrzne","nęt","ewnętrzn","ętrzn","zewnętr","nastę","tępn","ewnęt","rodziny","grał","ewnętr","zewnęt","igrzysk","wnę","przypi","zypis","tępni","znajduje","nętr","również","wnęt","następ","przypisy","ypisy"],"pa":["ives","readers","desai","mathematics","ished","echnolog","lenda","tefl","vanessa","kahani","emin","engineerin","ts","cores","mansa","pravda","sujata","riley","echno","kumaon","cognitiv","ansistor","howe","ily","differen","austra","nreview","punjab","ody","escri","hilosopher","ebel","uce","supporting","cet","cent","georgy","anic","commu","stru","rpca","jenning","ineer","nderstandi","vashem","davut","erforming","kanu","shiv","evening"],"mg":["indrindr","rehefa","njeniera","anarana","antsar","mpanoratra","voalohany","mpanorat","indrind","jeniera","toeran","mpanao","hatrami","ampiasa","hatramin","mponina","irenena","oronants","indrindra","lehibe","alohany","voaloha","mpahay","pilalao","senariô","ronantsar","voaloh","drindr","amorona","varatr","avaratr","injeniera","oalohan","frantsa","mpilalao","mpilal","irenen","firenena","onantsar","avaratra","varatra","aritra","nombok","noratr","renena","baolina","nomboka","noratra","mpanor","voalo"],"ckb":["باشوور","ناودار","وردستاندا","اتوو","نووس","نووسرا","سنوو","تووشبوون","شارستان","خواز","خراو","ردستاند","خستن","ناوبراو","ناونرا","ردستاندا","اشوور","رابوو","ردوو","نوور","ناسراو","ناسر","دواجار","ناسرا","خزم","دواتر","واندز","نووسراو","سنوور","مبوون","ونجاو","انرا","ووبا","زانست","سروشت","شتوو","ووبار","ناسراون","وتوو","داخراو","بووندا","مووش","ناتوانن","جووت","ووخا","تووش","ناودا","ربوو","نجاو","سادق"],"ml":["achin","nsiv","arden","ronme","umbellata","knuth","ewed","oord","perf","ronmenta","erflie","australian","fifty","ingi","dioscorea","advocate","ssociation","dyck","bellflower","marko","cao","fedtsch","respiratory","naja","wallace","bantam","ites","inat","launch","opul","conflicts","dixon","pital","gardens","performed","idere","models","department","mbi","officia","layalam","environment","trivand","regard","mep","rivillius","thirattu","nterview","niz","hemist"],"id":["pekerjaan","dirasakan","keseluru","mengungkapkan","berbasis","berkata","membangun","menjadi","diwariskan","keuangan","memungkinkan","mendukung","berangka","emulia","berangkat","suatu","sengit","memohon","membedakan","khalayak","miliki","tidak","memungkink","ngatakan","bertenaga","gembala","ercata","karyanya","emilik","memiliki","rjumlah","memili","keuan","berbas","sejumlah","berubah","seluler","ketakutan","eseluruhan","erjuml","mengungk","berbeda","emiliki","berisi","terkait","isolasi","seluruh","mencat","mengudara","rcatat"],"mr":["pakistan","maint","sacrum","cation","raphael","typi","rog","glass","translations","aeology","shankar","vkjksg","pup","doct","ubrama","grouped","truck","availa","geometr","ixtures","inancia","lution","luffa","esource","rick","rtme","ndust","arning","stábile","nvir","breeds","huáng","yoga","ground","artu","jec","rogra","trams","ranade","commercial","added","madras","authorized","typica","omput","ule","lence","func","grid","respect"],"tg":["км","ikiwand","нбо","шма","кч","аълу","мл","улф","идм","бораи","ълу","уфа","мар","уд","ниш","фе","киш","фас","кг","анд","ким","wikiw","cyclowiki","аф","сут","wikiwand","маълу","ълум","ка","намояндагон","гон","аки","аи","дон","сан","ммо","дат","tádzsikisztán","ад","маълум","рад","arof","мои","ред","мояндаг","ин","developer","мадрасаи","дар","хшб"],"eu":["erregea","maiatzaren","antziak","ntziako","rantziako","idazlea","dazlea","idazl","garren","alemaniar","emaniar","rantziak","jaio","martxo","espainiar","tziak","frantziar","kanpo","abenduaren","otsaila","urtarr","ntziak","handia","idazle","irailaren","margolaria","zientz","zlea","frantziako","zituen","artxo","martxoaren","italiar","uzkoar","ailare","tikaria","uztailaren","izkait","argolaria","kirol","bizkaitar","frantzia","estekak","zuten","tsailare","ratzaile","politikaria","martxoa","gazte","otsailaren"],"zu":["ngxenye","fundazw","mithomb","uleyisitata","inhlanz","tshonal","ezindaw","zindle","undazw","fundazwe","ukubhe","amaningi","okuningi","ifundazw","mithom","usihlw","zokuzijabulis","amalok","lwelanga","zomphak","gxen","sifundaz","setshenziselwa","ntshonala","kwalokho","inqubo","ngxen","eziningi","amakhasi","izimpawu","langoth","imithomb","ikwisifundaz","uhlangothi","intshonalanga","nokuthi","baphilayo","isishiy","imithombo","gcindezi","imithom","izilimi","wesifunda","kuyinto","uleyisit","ukuxhumana","ifundaz","isites","sifundazw","kubheka"],"sn":["hirev","mutauro","dzimwe","hireva","owanikw","oshandi","achirev","vanoti","kubva","nzvim","nowanikwa","zvinoreva","musoro","noshan","zvichi","nzvi","inonzi","shandisw","vachireva","mitauro","zvimbo","zvinore","rokuti","yebantu","noreva","zvaka","owanikwa","inowanikwa","achireva","chirev","wechi","mitaur","vinoreva","inorev","vimbo","shandis","shandiswa","zvich","kutau","vachire","zvinor","vamwe","inonz","inoreva","nzvimbo","zvinorev","kubv","nowanikw","mhuka","noshand"],"nl":["veneens","uitgebrac","bedrijf","eindigd","eindigde","indigd","eveneens","leeftijd","daarbij","voornamelijk","uiteindelijk","spoorlijn","oorspronkelijk","belangrijke","bruikt","oorlij","eftij","krijgt","geplaatst","gebruikt","aarnaast","ankrij","belangrijk","voorzien","ftijd","beschrijv","evenee","iteindelij","eplaatst","aardoor","wedstrijd","voornamelij","tijdens","strijden","chrijvin","tekende","oornamelijk","jgelegen","daarnaas","jgeleg","opgericht","wedstrijde","visies","zouden","ijving","frankrijk","waarbij","ebruikt","abijgeleg","gevonden"],"mn":["орм","аваа","байна","бусад","хоёр","багш","риулсан","expla","klaus","утга","бор","от","theor","дахь","roduct","мм","theorie","улсын","eaching","эдий","эгм","ста","илл","хаан","iquot","мэди","заан","ээллий","дм","хя","ард","нь","дис","мэ","сургуу","монгол","ажил","лба","мпиа","дархан","рам","лта","гуу","амд","жилийн","апи","цэргийн","нэг","дт","отго"],"kn":["bjp","usd","chilli","medicin","tamil","haudhary","thrust","chf","moves","chrö","presentatio","goel","divide","atiśa","voti","bombay","gmbh","clause","appeals","redox","wikiped","bdd","mysore","alysi","gslv","clayey","acetic","tmz","herrick","nclu","eferenc","graphy","scr","fisher","arm","rn","gerard","nstitut","ghz","dec","lcutt","minister","cathe","lop","bmi","rganizatio","sharks","issio","indust","hanoi"],"ug":["ىقتى","قىل","ىساد","دىك","ىمى","جايلا","تىن","ىنى","بىر","سىي","لىد","ىغا","ىدى","دىكى","جايل","لىنى","ىسا","ىس","مىل","ىلا","قتىس","ارلىق","ىلل","ىقى","تىك","لىقى","تىس","قىلى","بولسا","لىق","لىر","ىتى","ىقتىس","ىغان","ىقتىساد","تىساد","يىلى","دىن","لىك","ىكى","يىل","ىستا","ىشى","ىيى","ىست","ىما","رىد","ىرى","ىسى","لىن"],"ky":["птери","котор","арб","бак","тт","китепт","ту","итеп","китеп","компьютер","жак","жок","эки","ргызды","улуу","итепте","жогорку","тартип","жылдар","акш","жана","ошондой","nschr","мле","ги","ери","туу","тери","теп","епте","ептер","турган","кыргыз","тептери","тараган","тарых","кму","тер","бюджет","пте","епт","ките","ргызс","канат","аак","куу","буу","тар","жолу","ксат"],"ro":["astfel","referințe","semenea","sfârșitul","nființ","voluț","cție","început","asemenea","schimb","funcțion","emenea","erințe","pând","potriv","asemene","arți","ceputu","faptul","dezvolta","octombr","acesta","colului","internaț","înfii","fârș","secolului","înfiin","ctombrie","ârș","ființ","iinț","uția","octombri","oameni","uncțio","șitu","târziu","istemul","ctombr","tiinț","ezvol","precum","nașteri","ârși","octombrie","domeniul","secolul","eputul","rșitu"],"hr":["blažene","straliju","sudjelov","boravak","glazba","groblju","brzina","lješ","odlazak","napasti","trnae","primjerice","boriti","bitci","glazbi","temeljem","odražava","trebale","ovlasti","župana","vesna","razdoblje","okolnim","udjelov","pravima","bjasni","ednadž","olovoz","pjesama","tinjak","kolovoza","kolovo","progona","primjene","zavjeta","prošlog","poticaj","dobara","puštena","dnadžb","bojnik","istaknuo","prosinca","ustaški","zlikovat","mediji","zavjet","namjena","mještani","kojemu"],"ig":["gbasara","dabere","nwèrè","gbanwe","okpuru","meriri","njirimara","malitere","gwuregwu","mpagha","egwuregwu","chineke","onyonyo","bidoro","nkeji","nweta","okike","màkà","mahadum","ukwuu","ubochi","nwetara","nwunye","okpara","gwure","mmemme","egwuregw","dozie","aghara","wuregwu","nwaanyi","nhazi","itoolu","nwekwara","omumu","nwanne","enwere","nrite","mpaghara","omenala","mgbanwe","egwure","akwukwo","leekwa","nwanyi","dummir","gwuregw","nnukwu","nsogbu","mwakpo"],"eo":["ksteraj","lkovr","irkaŭ","kelkaj","ligiloj","alkovr","eksteraj","oĝantoj","komunum","loĝantoj","iĝi","iĝis","ekstera","ĝant","kovrit","alkovri","loĝ","oĝan","ankaŭ","ĝin","oĝanto","oĝant","oĝa","aĝa","askiĝi","ferenco","ĝas","viĝa","ovrita","ĝan","metroj","lkovri","ovrit","malkovr","ricevis","eferenco","kiĝis","lernejo","oviĝ","kiĝi","encoj","naskiĝi","naskiĝis","malkovri","komunumo","omunumo","loĝa","askiĝis","loĝanto","referencoj"],"sd":["هريون","نموني","تعلقي","ناهه","يونانين","حيدرآبا","قلمي","ذاريا","حيدرآ","آزادي","هندن","واسطو","ينهن","هندستان","سلسلي","يدرآباد","آهي","حيدرآباد","نعرا","هوندي","روزاني","برطانيا","قدرتي","حيرت","نهنجي","حيثيت","ندرهين","ايتري","سمجهي","هندست","ديوتا","زرخيز","تنهن","ذريعي","يدرآبا","ورثي","رسالن","وسايو","رهندا","نهنجو","ناهي","منهن","حيدرآب","بابت","جنهن","مقامي","عرصي","سمجهيا","شاهه","انهن"]} \ No newline at end of file diff --git a/data/training/dbtools/getbestwords.sql b/data/training/dbtools/getbestwords.sql index 4c98082..2509d4d 100644 --- a/data/training/dbtools/getbestwords.sql +++ b/data/training/dbtools/getbestwords.sql @@ -2,4 +2,8 @@ CREATE temporary TABLE tmptable AS SELECT DISTINCT(language) from WordScore; -select tmptable.language from tmptable; + select tmptable.language, (select w.word from + WordScore as w where w.language = tmptable.language + group by w.RootWord order by w.score desc limit 30 + + ) as word from tmptable; diff --git a/data/training/schema.sql b/data/training/schema.sql index c505db7..b87c0e9 100644 --- a/data/training/schema.sql +++ b/data/training/schema.sql @@ -129,16 +129,16 @@ Create Table WordCountTable( ); Create Table WordScore( - word string not null, + Word string not null, RootWord string not null, - language string not null, - occurrence int not null, - occurrencePerSample float not null, - isolationPercentage float not null, - score float not null, - stage string not null, - samples int not null, - unique(word) + Language string not null, + Occurrence int not null, + OccurrencePerSample float not null, + IsolationPercentage float not null, + Score float not null, + Stage string not null, + Samples int not null, + unique(Word) ); diff --git a/nim/main b/nim/main new file mode 100755 index 0000000..a8daeed Binary files /dev/null and b/nim/main differ diff --git a/nim/main.nim b/nim/main.nim index 0e1b80b..329e09a 100644 --- a/nim/main.nim +++ b/nim/main.nim @@ -148,7 +148,7 @@ proc neighborDistance(a : Table[Rune, float], b : Table[Rune, float]) : float = resultBuffer[i] = (cubic(abs(a[char]-distance))) return resultBuffer.foldl(a+b) -proc doThing*(comparisonLangs : seq[string], sample : string, +proc zipfsLanguageDetector*(comparisonLangs : seq[string], sample : string, wordCounter : TableRef[string, CountTable[string]] = nil, words : Table[string, HashSet[string]] = mostCommonWords) : Table[string, float] {.gcsafe.} = let deNoised = comparisonLangs.map(x => statistics[x]) @@ -244,6 +244,80 @@ proc doThing*(comparisonLangs : seq[string], sample : string, break if not hasAnyKeys: result["unknown"] = -1 + +proc createCStringArray(a : openArray[string] | seq[string]) : (ptr UncheckedArray[cstring], uint16) = + let length = a.len() + var sum = 0 + if length == 0: + let newArray = cast[ptr UncheckedArray[cstring]](create(cstring, 1)) + return (newArray, 0) + for i in a: + sum+=i.len() + var newArray = cast[ptr UncheckedArray[cstring]](create(cstring, sum)) + + for i in 0 .. a.high: + newArray[i] = cstring a[i] + return (newArray, uint16 length-1) + + proc makeResult*(oldResult : Table[string, float]) : seq[(string, float)] = result = oldResult.pairs().toSeq() result.sort((x,y)=> cmp(x[1], y[1])) + +proc zipfs_language_detector*(languages : ptr UncheckedArray[cstring], + languages_count : uint64, + sample : cstring, + successful : ptr bool, + length_output : ptr uint64, + result_buffer_float : ptr ptr UncheckedArray[float32], + result : ptr ptr UncheckedArray[cstring] + ) {.exportc.} = + echo "0.1" + let languages = languages.toOpenArray(0, int(languages_count)).toSeq().map(x=> $x) + let sample = $sample + echo "0.2" + let output = zipfsLanguageDetector(languages, sample) + echo "0.7" + let resultPairs = makeResult output + length_output[] = cuint(resultPairs.high) + echo "1" + successful[] = length_output[] == languages_count + echo "2" + result[] = cast[ptr UncheckedArray[cstring]](create(cstring, sizeof(cstring)*resultPairs.high)) + var i = 0 + echo "4" + for (language, _) in resultPairs: + var newCstring = cast[cstring](create(byte, language.high)) + copyMem(newCstring, addr language[0], language.len) + result[][i] = newCstring + i+=1 + echo "5" + + let floatsize = sizeof(float32)*(int(length_output[])+2) + let floatResult = cast[ptr UncheckedArray[float32]](create(float32, floatsize)) + + + for x in 0 .. resultPairs.high: + floatResult[x] = resultPairs[x][1] + echo "6" + result_buffer_float[] = floatResult + echo "7" +#[let testLanguages = createCStringArray(@["sv", "en"]) +var successful = false +var lengthResult : cuint = 0 +var result : ptr UncheckedArray[cstring] +var floatResult : ptr UncheckedArray[float32] + +zipfs_language_detector( + testLanguages[0], + cuint testLanguages[1], + cstring "the quick brown fox jumps over the lazy red dog", + addr successful, + addr lengthResult, + addr floatResult, + addr result) + +echo successful +echo floatResult.toOpenArray(0, int(lengthResult)).toSeq() +echo result.toOpenArray(0, int(lengthResult)).toSeq() +#]# diff --git a/nim/scoring b/nim/scoring index 7bab42b..4c1dad7 100755 Binary files a/nim/scoring and b/nim/scoring differ diff --git a/nim/scoring.nim b/nim/scoring.nim index ced00d5..9197940 100644 --- a/nim/scoring.nim +++ b/nim/scoring.nim @@ -151,7 +151,7 @@ proc createWordScore*(words : Table[string, HashSet[string]], beFast = false) : continue fastCounter.inc(correctLanguage) - let result = makeResult doThing(languages, sample, wordCounter = wordCounts, words) + let result = makeResult zipfsLanguageDetector(languages, sample, wordCounter = wordCounts, words) let correct = result[0][0] == correctLanguage if correct: langToAccuracy[correctLanguage].correct+=1