44 lines
1 KiB
Nim
44 lines
1 KiB
Nim
import db_connector/db_sqlite
|
|
import bitops
|
|
import streams
|
|
import os
|
|
import strutils
|
|
import sugar
|
|
import sequtils
|
|
import times
|
|
import tables
|
|
import json
|
|
let w2m = parseJson readFile("../data/wikiToMulti.json")
|
|
var backwards = initTable[string, string]()
|
|
|
|
for key,val in w2m.pairs:
|
|
backwards[val.getStr()] = key
|
|
|
|
var result = newJObject()
|
|
|
|
for file in walkDir("../words/data/wordfrequency.info/"):
|
|
let languageRaw = file.path.split("/")[^1].split(".")[0]
|
|
let words = readFile(file.path).split("\n")[0 .. 300]
|
|
var language : string
|
|
|
|
if languageRaw notin backwards:
|
|
#TODO: FIX CHINESE (ZH)
|
|
echo languageRaw
|
|
continue
|
|
else:
|
|
language = backwards[languageRaw]
|
|
|
|
var wordsAdded : seq[string]
|
|
for word in words:
|
|
if 2 >= word.len():
|
|
continue
|
|
if wordsAdded.high == 199: break
|
|
if word == language: continue
|
|
if word in wordsAdded: continue
|
|
wordsAdded.add word
|
|
|
|
if wordsAdded.high != 199:
|
|
raise new CatchableError
|
|
|
|
result[language] = %* wordsAdded
|
|
writeFile("../data/mostCommonWords.json", $result)
|