added c bindings, make file, and a c test.
This commit is contained in:
parent
d90b4457cd
commit
8c43b32e7d
13 changed files with 152 additions and 13 deletions
11
Makefile
Normal file
11
Makefile
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Variables
|
||||
nim = nim
|
||||
nim_flags = c -d:useMalloc --mm:arc -d:debug --maxLoopIterationsVM:20000000 --app:staticlib --o:build/libzipfs-ld.a
|
||||
nim_file = nim/main
|
||||
|
||||
# Default target
|
||||
all:./build/libzipfs-ld.a
|
||||
|
||||
# Build target
|
||||
./build/libzipfs-ld.a:
|
||||
nim $(nim_flags) $(nim_file)
|
BIN
build/libzipfs-ld.a
Normal file
BIN
build/libzipfs-ld.a
Normal file
Binary file not shown.
12
build/zipfs-ld.h
Normal file
12
build/zipfs-ld.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
|
||||
#include <bits/floatn-common.h>
|
||||
#include <stdbool.h>
|
||||
void zipfs_language_detector(
|
||||
char** languages,
|
||||
uint64_t languages_count,
|
||||
char* input_string,
|
||||
bool* successful,
|
||||
uint64_t* length_output,
|
||||
float** result_buffer_float,
|
||||
char*** result
|
||||
);
|
BIN
ctest/a.out
Executable file
BIN
ctest/a.out
Executable file
Binary file not shown.
37
ctest/test.c
Normal file
37
ctest/test.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./zipfs-ld.h"
|
||||
#include <stdlib.h>
|
||||
int main(){
|
||||
char* languages[] = {"sv", "en", "de", "nl"};
|
||||
char* test_string = " De vragen en manier waarop ze zijn opgesteld, zijn een nieuw niveau van slecht. . ";
|
||||
uint64_t language_count = 3;
|
||||
bool worked = false;
|
||||
uint64_t length_output;
|
||||
char** result;
|
||||
float* float_result;
|
||||
int i;
|
||||
for (i = 0; i != 10; i++){
|
||||
zipfs_language_detector(
|
||||
languages,
|
||||
language_count,
|
||||
test_string,
|
||||
&worked,
|
||||
&length_output,
|
||||
&float_result,
|
||||
&result
|
||||
);
|
||||
int s = 0;
|
||||
for (s = 0; s != length_output+1; s++){
|
||||
printf("%s\n", result[s]);
|
||||
printf("%f\n", float_result[s]);
|
||||
free(result[s]);
|
||||
printf("s: %i\n", s);
|
||||
};
|
||||
free(result);
|
||||
free(float_result);
|
||||
}
|
||||
|
||||
}
|
2
data/.gitignore
vendored
2
data/.gitignore
vendored
|
@ -1 +1 @@
|
|||
mostCommonWords.json
|
||||
|
||||
|
|
1
data/mostCommonWords.json
Normal file
1
data/mostCommonWords.json
Normal file
File diff suppressed because one or more lines are too long
|
@ -2,4 +2,8 @@
|
|||
CREATE temporary TABLE tmptable AS
|
||||
SELECT DISTINCT(language) from WordScore;
|
||||
|
||||
select tmptable.language from tmptable;
|
||||
select tmptable.language, (select w.word from
|
||||
WordScore as w where w.language = tmptable.language
|
||||
group by w.RootWord order by w.score desc limit 30
|
||||
|
||||
) as word from tmptable;
|
||||
|
|
|
@ -129,16 +129,16 @@ Create Table WordCountTable(
|
|||
);
|
||||
|
||||
Create Table WordScore(
|
||||
word string not null,
|
||||
Word string not null,
|
||||
RootWord string not null,
|
||||
language string not null,
|
||||
occurrence int not null,
|
||||
occurrencePerSample float not null,
|
||||
isolationPercentage float not null,
|
||||
score float not null,
|
||||
stage string not null,
|
||||
samples int not null,
|
||||
unique(word)
|
||||
Language string not null,
|
||||
Occurrence int not null,
|
||||
OccurrencePerSample float not null,
|
||||
IsolationPercentage float not null,
|
||||
Score float not null,
|
||||
Stage string not null,
|
||||
Samples int not null,
|
||||
unique(Word)
|
||||
);
|
||||
|
||||
|
||||
|
|
BIN
nim/main
Executable file
BIN
nim/main
Executable file
Binary file not shown.
76
nim/main.nim
76
nim/main.nim
|
@ -148,7 +148,7 @@ proc neighborDistance(a : Table[Rune, float], b : Table[Rune, float]) : float =
|
|||
resultBuffer[i] = (cubic(abs(a[char]-distance)))
|
||||
return resultBuffer.foldl(a+b)
|
||||
|
||||
proc doThing*(comparisonLangs : seq[string], sample : string,
|
||||
proc zipfsLanguageDetector*(comparisonLangs : seq[string], sample : string,
|
||||
wordCounter : TableRef[string, CountTable[string]] = nil,
|
||||
words : Table[string, HashSet[string]] = mostCommonWords) : Table[string, float] {.gcsafe.} =
|
||||
let deNoised = comparisonLangs.map(x => statistics[x])
|
||||
|
@ -244,6 +244,80 @@ proc doThing*(comparisonLangs : seq[string], sample : string,
|
|||
break
|
||||
if not hasAnyKeys:
|
||||
result["unknown"] = -1
|
||||
|
||||
proc createCStringArray(a : openArray[string] | seq[string]) : (ptr UncheckedArray[cstring], uint16) =
|
||||
let length = a.len()
|
||||
var sum = 0
|
||||
if length == 0:
|
||||
let newArray = cast[ptr UncheckedArray[cstring]](create(cstring, 1))
|
||||
return (newArray, 0)
|
||||
for i in a:
|
||||
sum+=i.len()
|
||||
var newArray = cast[ptr UncheckedArray[cstring]](create(cstring, sum))
|
||||
|
||||
for i in 0 .. a.high:
|
||||
newArray[i] = cstring a[i]
|
||||
return (newArray, uint16 length-1)
|
||||
|
||||
|
||||
proc makeResult*(oldResult : Table[string, float]) : seq[(string, float)] =
|
||||
result = oldResult.pairs().toSeq()
|
||||
result.sort((x,y)=> cmp(x[1], y[1]))
|
||||
|
||||
proc zipfs_language_detector*(languages : ptr UncheckedArray[cstring],
|
||||
languages_count : uint64,
|
||||
sample : cstring,
|
||||
successful : ptr bool,
|
||||
length_output : ptr uint64,
|
||||
result_buffer_float : ptr ptr UncheckedArray[float32],
|
||||
result : ptr ptr UncheckedArray[cstring]
|
||||
) {.exportc.} =
|
||||
echo "0.1"
|
||||
let languages = languages.toOpenArray(0, int(languages_count)).toSeq().map(x=> $x)
|
||||
let sample = $sample
|
||||
echo "0.2"
|
||||
let output = zipfsLanguageDetector(languages, sample)
|
||||
echo "0.7"
|
||||
let resultPairs = makeResult output
|
||||
length_output[] = cuint(resultPairs.high)
|
||||
echo "1"
|
||||
successful[] = length_output[] == languages_count
|
||||
echo "2"
|
||||
result[] = cast[ptr UncheckedArray[cstring]](create(cstring, sizeof(cstring)*resultPairs.high))
|
||||
var i = 0
|
||||
echo "4"
|
||||
for (language, _) in resultPairs:
|
||||
var newCstring = cast[cstring](create(byte, language.high))
|
||||
copyMem(newCstring, addr language[0], language.len)
|
||||
result[][i] = newCstring
|
||||
i+=1
|
||||
echo "5"
|
||||
|
||||
let floatsize = sizeof(float32)*(int(length_output[])+2)
|
||||
let floatResult = cast[ptr UncheckedArray[float32]](create(float32, floatsize))
|
||||
|
||||
|
||||
for x in 0 .. resultPairs.high:
|
||||
floatResult[x] = resultPairs[x][1]
|
||||
echo "6"
|
||||
result_buffer_float[] = floatResult
|
||||
echo "7"
|
||||
#[let testLanguages = createCStringArray(@["sv", "en"])
|
||||
var successful = false
|
||||
var lengthResult : cuint = 0
|
||||
var result : ptr UncheckedArray[cstring]
|
||||
var floatResult : ptr UncheckedArray[float32]
|
||||
|
||||
zipfs_language_detector(
|
||||
testLanguages[0],
|
||||
cuint testLanguages[1],
|
||||
cstring "the quick brown fox jumps over the lazy red dog",
|
||||
addr successful,
|
||||
addr lengthResult,
|
||||
addr floatResult,
|
||||
addr result)
|
||||
|
||||
echo successful
|
||||
echo floatResult.toOpenArray(0, int(lengthResult)).toSeq()
|
||||
echo result.toOpenArray(0, int(lengthResult)).toSeq()
|
||||
#]#
|
||||
|
|
BIN
nim/scoring
BIN
nim/scoring
Binary file not shown.
|
@ -151,7 +151,7 @@ proc createWordScore*(words : Table[string, HashSet[string]], beFast = false) :
|
|||
continue
|
||||
fastCounter.inc(correctLanguage)
|
||||
|
||||
let result = makeResult doThing(languages, sample, wordCounter = wordCounts, words)
|
||||
let result = makeResult zipfsLanguageDetector(languages, sample, wordCounter = wordCounts, words)
|
||||
let correct = result[0][0] == correctLanguage
|
||||
if correct:
|
||||
langToAccuracy[correctLanguage].correct+=1
|
||||
|
|
Loading…
Reference in a new issue