added c bindings, make file, and a c test.

This commit is contained in:
user 2024-08-10 00:17:31 -04:00
parent d90b4457cd
commit 8c43b32e7d
13 changed files with 152 additions and 13 deletions

11
Makefile Normal file
View file

@ -0,0 +1,11 @@
# Variables
nim = nim
nim_flags = c -d:useMalloc --mm:arc -d:debug --maxLoopIterationsVM:20000000 --app:staticlib --o:build/libzipfs-ld.a
nim_file = nim/main
# Default target
all:./build/libzipfs-ld.a
# Build target
./build/libzipfs-ld.a:
nim $(nim_flags) $(nim_file)

BIN
build/libzipfs-ld.a Normal file

Binary file not shown.

12
build/zipfs-ld.h Normal file
View file

@ -0,0 +1,12 @@
#include <bits/floatn-common.h>
#include <stdbool.h>
void zipfs_language_detector(
char** languages,
uint64_t languages_count,
char* input_string,
bool* successful,
uint64_t* length_output,
float** result_buffer_float,
char*** result
);

BIN
ctest/a.out Executable file

Binary file not shown.

37
ctest/test.c Normal file
View file

@ -0,0 +1,37 @@
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include "./zipfs-ld.h"
#include <stdlib.h>
int main(){
char* languages[] = {"sv", "en", "de", "nl"};
char* test_string = " De vragen en manier waarop ze zijn opgesteld, zijn een nieuw niveau van slecht. . ";
uint64_t language_count = 3;
bool worked = false;
uint64_t length_output;
char** result;
float* float_result;
int i;
for (i = 0; i != 10; i++){
zipfs_language_detector(
languages,
language_count,
test_string,
&worked,
&length_output,
&float_result,
&result
);
int s = 0;
for (s = 0; s != length_output+1; s++){
printf("%s\n", result[s]);
printf("%f\n", float_result[s]);
free(result[s]);
printf("s: %i\n", s);
};
free(result);
free(float_result);
}
}

2
data/.gitignore vendored
View file

@ -1 +1 @@
mostCommonWords.json

File diff suppressed because one or more lines are too long

View file

@ -2,4 +2,8 @@
CREATE temporary TABLE tmptable AS
SELECT DISTINCT(language) from WordScore;
select tmptable.language from tmptable;
select tmptable.language, (select w.word from
WordScore as w where w.language = tmptable.language
group by w.RootWord order by w.score desc limit 30
) as word from tmptable;

View file

@ -129,16 +129,16 @@ Create Table WordCountTable(
);
Create Table WordScore(
word string not null,
Word string not null,
RootWord string not null,
language string not null,
occurrence int not null,
occurrencePerSample float not null,
isolationPercentage float not null,
score float not null,
stage string not null,
samples int not null,
unique(word)
Language string not null,
Occurrence int not null,
OccurrencePerSample float not null,
IsolationPercentage float not null,
Score float not null,
Stage string not null,
Samples int not null,
unique(Word)
);

BIN
nim/main Executable file

Binary file not shown.

View file

@ -148,7 +148,7 @@ proc neighborDistance(a : Table[Rune, float], b : Table[Rune, float]) : float =
resultBuffer[i] = (cubic(abs(a[char]-distance)))
return resultBuffer.foldl(a+b)
proc doThing*(comparisonLangs : seq[string], sample : string,
proc zipfsLanguageDetector*(comparisonLangs : seq[string], sample : string,
wordCounter : TableRef[string, CountTable[string]] = nil,
words : Table[string, HashSet[string]] = mostCommonWords) : Table[string, float] {.gcsafe.} =
let deNoised = comparisonLangs.map(x => statistics[x])
@ -244,6 +244,80 @@ proc doThing*(comparisonLangs : seq[string], sample : string,
break
if not hasAnyKeys:
result["unknown"] = -1
proc createCStringArray(a : openArray[string] | seq[string]) : (ptr UncheckedArray[cstring], uint16) =
let length = a.len()
var sum = 0
if length == 0:
let newArray = cast[ptr UncheckedArray[cstring]](create(cstring, 1))
return (newArray, 0)
for i in a:
sum+=i.len()
var newArray = cast[ptr UncheckedArray[cstring]](create(cstring, sum))
for i in 0 .. a.high:
newArray[i] = cstring a[i]
return (newArray, uint16 length-1)
proc makeResult*(oldResult : Table[string, float]) : seq[(string, float)] =
result = oldResult.pairs().toSeq()
result.sort((x,y)=> cmp(x[1], y[1]))
proc zipfs_language_detector*(languages : ptr UncheckedArray[cstring],
languages_count : uint64,
sample : cstring,
successful : ptr bool,
length_output : ptr uint64,
result_buffer_float : ptr ptr UncheckedArray[float32],
result : ptr ptr UncheckedArray[cstring]
) {.exportc.} =
echo "0.1"
let languages = languages.toOpenArray(0, int(languages_count)).toSeq().map(x=> $x)
let sample = $sample
echo "0.2"
let output = zipfsLanguageDetector(languages, sample)
echo "0.7"
let resultPairs = makeResult output
length_output[] = cuint(resultPairs.high)
echo "1"
successful[] = length_output[] == languages_count
echo "2"
result[] = cast[ptr UncheckedArray[cstring]](create(cstring, sizeof(cstring)*resultPairs.high))
var i = 0
echo "4"
for (language, _) in resultPairs:
var newCstring = cast[cstring](create(byte, language.high))
copyMem(newCstring, addr language[0], language.len)
result[][i] = newCstring
i+=1
echo "5"
let floatsize = sizeof(float32)*(int(length_output[])+2)
let floatResult = cast[ptr UncheckedArray[float32]](create(float32, floatsize))
for x in 0 .. resultPairs.high:
floatResult[x] = resultPairs[x][1]
echo "6"
result_buffer_float[] = floatResult
echo "7"
#[let testLanguages = createCStringArray(@["sv", "en"])
var successful = false
var lengthResult : cuint = 0
var result : ptr UncheckedArray[cstring]
var floatResult : ptr UncheckedArray[float32]
zipfs_language_detector(
testLanguages[0],
cuint testLanguages[1],
cstring "the quick brown fox jumps over the lazy red dog",
addr successful,
addr lengthResult,
addr floatResult,
addr result)
echo successful
echo floatResult.toOpenArray(0, int(lengthResult)).toSeq()
echo result.toOpenArray(0, int(lengthResult)).toSeq()
#]#

Binary file not shown.

View file

@ -151,7 +151,7 @@ proc createWordScore*(words : Table[string, HashSet[string]], beFast = false) :
continue
fastCounter.inc(correctLanguage)
let result = makeResult doThing(languages, sample, wordCounter = wordCounts, words)
let result = makeResult zipfsLanguageDetector(languages, sample, wordCounter = wordCounts, words)
let correct = result[0][0] == correctLanguage
if correct:
langToAccuracy[correctLanguage].correct+=1