Zipfs-Law-Language-Detector/data/training/schema.sql
2024-10-11 05:33:32 -04:00

146 lines
4 KiB
SQL

Create Table Accuracy(
Language string not null,
MacroScoreId Integer Not Null,
Correct Integer NOT null,
Incorrect Integer not null,
Faliures Integer not null,
Foreign Key(MacroScoreId) References MacroScore(RowId)
);
Create Table CountTable(
MacroScoreId Integer Not Null,
AccuracyId Integer Not Null,
StrKey String NOT NULL,
IntVal Integer NOT NULL,
Type String not null,
Language string not null,
Foreign Key(MacroScoreId) References MacroScore(RowId),
Foreign Key(AccuracyId) References Accuracy(RowId)
);
Create Table RunningStat(
MacroScoreId Integer Not Null,
Max Float not null,
Min Float not null,
Sum Float not null,
Mean Float not null,
StdDeviation Float not null,
Foreign Key(MacroScoreId) References MacroScore(RowId)
);
Create Table Score(
MacroScoreId Integer Not Null,
AccuracyRowId Integer not null,
Language text not null,
FaliureRate float not null,
TotalWordUtilization Integer not null,
UtlizationPerWord float not null,
TotalGoodWordUtilization Integer not null,
GoodUtilizationPerWord float not null,
PercentGoodUtilization float not null,
TotalBadWordUtilization Integer not null,
BadUtilizationPerWord float not null,
PercentBadUtilization float not null,
UsedWordPercentGood float not null,
UsedWordPercentBad float not null,
Foreign Key(MacroScoreId) References MacroScore(RowId)
Foreign Key(AccuracyRowId) References Accuracy(Rowid)
);
Create Table Words(
StrKey String NOT NULL,
Language String NOT NULL,
MacroScoreId Integer Not Null,
Foreign Key(MacroScoreId) References MacroScore(RowId)
);
Create Table WordSums(
--Acts as a lock for Words, to prevent duplication
Sha256 blob not null,
Unique(Sha256)
);
Create Table MacroScore(
Score Float,
WordCount Integer,
FaliureRates Integer,
TotalWordUtilizations Integer,
UtlizationPerWords Integer,
TotalGoodWordUtilizations Integer,
GoodUtilizationPerWords Integer,
PercentGoodUtilizations Integer,
TotalBadWordUtilizations Integer,
BadUtilizationPerWords Integer,
PercentBadUtilizations Integer,
UsedWordPercentsBad Integer,
UsedWordPercentsGood Integer,
/*
The Rows are inserted null, and then updated
So all the child objects can be inserted referencing this
*/
IsPopulated Integer Default 1 Not Null,
Generation Integer NOT NULL,
Foreign Key(Generation) References Generation(RowId),
Foreign Key(faliureRates) References RunningStat(Rowid),
Foreign Key(totalWordUtilizations) References RunningStat(Rowid),
Foreign Key (utlizationPerWords) References RunningStat(Rowid),
Foreign Key (totalGoodWordUtilizations) References RunningStat(Rowid),
Foreign Key (goodUtilizationPerWords) References RunningStat(Rowid),
Foreign Key (percentGoodUtilizations) References RunningStat(Rowid),
Foreign Key (totalBadWordUtilizations) References RunningStat(Rowid),
Foreign Key (badUtilizationPerWords) References RunningStat(Rowid),
Foreign Key (percentBadUtilizations) References RunningStat(Rowid),
Foreign Key (usedWordPercentsBad) References RunningStat(Rowid),
Foreign Key (usedWordPercentsGood) References RunningStat(Rowid)
);
Create Table Generation(
TimeStarted DateTime not null,
TimeEnded DateTime not null
);
Create Table WordCountTable(
Language string not null,
Count int not null,
WordScoreId int not null,
Foreign Key(WordScoreId) References WordScore(RowId)
);
Create Table WordScore(
Word string not null,
RootWord string not null,
Language string not null,
Occurrence int not null,
OccurrencePerSample float not null,
IsolationPercentage float not null,
Score float not null,
Stage string not null,
Samples int not null,
unique(Word)
);
CREATE INDEX faliureRates ON Score(Language, FaliureRate ASC);
CREATE INDEX LanguagePlusMacroScore ON Words(Language, MacroScoreId);
CREATE INDEX CounTableMacroScore ON CountTable(MacroScoreId);