init
This commit is contained in:
commit
c06a04ed72
15 changed files with 1275 additions and 0 deletions
101
README.md
Normal file
101
README.md
Normal file
|
@ -0,0 +1,101 @@
|
|||
# ArbitraryFileVideoEncoder
|
||||
|
||||
# What is this?
|
||||
|
||||
This project is intended to allow you to encode files to videos, and then upload those videos to anywhere,
|
||||
and use them in the same way you would files.
|
||||
|
||||
Simple concept, and before encoding, perfectly doable, easily.
|
||||
|
||||
Example output:
|
||||
|
||||

|
||||
|
||||
# Machine Learning
|
||||
|
||||
A machine learning training method for reducing transmission corruption was made in training.nim. Due to limitations in arraymancer currently with serialization, the ability to load the model is very buggy and thus not implemented.
|
||||
|
||||
|
||||
# Encoding Standard
|
||||
|
||||
The encoding is relatively simple right now.
|
||||
|
||||
The first 256 blocks are individual colors, they are used as the key for the image, 0 - 255 respectively.
|
||||
Each time an image of that color is identified, it can be referenced with its place in the first 255
|
||||
|
||||
This means the data retention is dependent on how distinct each individual color can be between each other, and how well they can be persevered to look like each other
|
||||
|
||||
# Serialization standard
|
||||
|
||||
Currently debug data can be saved into binary files via tensorCeral. The .bin encoding is in tensorCeral.nim and works like this:
|
||||
The first 9 bytes are encoded as 32 bit unsigned int
|
||||
|
||||
[Dimension X, Dimension Y, The length of the following array [as a redundancy]] * 3
|
||||
The following data, until the end of the file is data in bytes, equal to the dimensions first specified.
|
||||
|
||||
# Usage
|
||||
|
||||
- Implementation
|
||||
|
||||
Used to decode a .bin file to an output, has graphing when compiled with different settings, and an interactive pager. To get graph outputs use -d:colordebug -d:graph, and for pager -d:pager
|
||||
note output can be "" or "-" for stdout
|
||||
use ./implementation [input.bin] [output] [originalfile]
|
||||
|
||||
for training and cl help, they have built in help messages
|
||||
|
||||
- TensorCeral, its purposes are only used within LDPC
|
||||
|
||||
an example usage to test cl encoding is:
|
||||
```bash
|
||||
./cl -e yourFile.zip outFile.bin bestpallet.plte; ./implementation outFile.bin yourFileClone.zip
|
||||
```
|
||||
|
||||
# Unit-Tests
|
||||
Some unit tests have been implemented in unit-test.nim
|
||||
If everything returns positive, then everything should be working
|
||||
|
||||
|
||||
# What is bestpallet.plte?
|
||||
It is the greatest pallet I have generated randomly, used mostly as a reference and a starting ground for all comparisons.
|
||||
|
||||
# Corruption
|
||||
|
||||
Compressing these files then using a simple color comparison algorithm is going to cause transmission data corruption, which can be pretty significant unless you have a good form of data redundancy algorithm.
|
||||
|
||||
Currently with the included pellet, we can get around ~2.7% - 6% corruption; That is the pallet i used in the example image. I included some basic statistical sorting, but it only increases corruption, and LDPC codes seem to make it skyrocket.
|
||||
|
||||
2.7% Is not enough to maintain most corruption-redundant data format's integrity, and thus, is insufficient to be use in practical purposes.
|
||||
|
||||
# Future
|
||||
In the future, I would like to include some very important stuff
|
||||
1. R statistical systems, to better identify the incorrect colors
|
||||
2. Custom LDPC codes, this is the key
|
||||
3. Hand mate color pallets
|
||||
4. Refine machine learning into a classification algorithm feedback through implementation
|
||||
|
||||
Once we can achieve data corrupts low enough for an archive to survive 'transmission' we can start to look at more complex data structures.
|
||||
|
||||
# Graphs
|
||||
|
||||
The graph output of trainingdata. This graph shows various models at different points in their devolvement, showing their accuracy and efficiencies.
|
||||
|
||||

|
||||
|
||||
This is a graph output of implementation, showing the inefficiencies of the current systems:
|
||||
|
||||

|
||||
|
||||
# Special credits
|
||||
All of the following friends and family helped me throughout the time I worked on this. No technical help was given.
|
||||
|
||||
Cassie
|
||||
Morgan
|
||||
Max
|
||||
Lonk
|
||||
Brungo
|
||||
Waka
|
||||
Microgravity
|
||||
Ilrasso
|
||||
Ela
|
||||
Jacko
|
||||
JDL
|
BIN
archivedata/implementation.png
Normal file
BIN
archivedata/implementation.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 118 KiB |
BIN
archivedata/visual.png
Normal file
BIN
archivedata/visual.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 90 KiB |
BIN
bestpallet.plte
Normal file
BIN
bestpallet.plte
Normal file
Binary file not shown.
298
cl.nim
Normal file
298
cl.nim
Normal file
|
@ -0,0 +1,298 @@
|
|||
import strutils
|
||||
import sugar
|
||||
import sequtils
|
||||
import math
|
||||
import arraymancer
|
||||
import random
|
||||
import itertools
|
||||
import simplepng
|
||||
import nimPNG
|
||||
import system
|
||||
import times
|
||||
import tables
|
||||
import streams
|
||||
import os
|
||||
import json
|
||||
import osproc
|
||||
import tensorCeral
|
||||
import strformat
|
||||
|
||||
proc itPad(pad, width: int, start = 0) : iterator() : seq[(int, int)] {.gcsafe.} =
|
||||
## Creates a coordinate continum corresponding to the pad and resolution
|
||||
## ie 0,0 0,1 0,2 1,0 1,1 1,2 2,0 2,1 2,2
|
||||
return iterator(): seq[(int,int)] =
|
||||
for x in countup(pad, width, pad+1):
|
||||
for y in countup(pad, width, pad+1):
|
||||
var temp : seq[(int,int)]
|
||||
for a in y-(pad) .. y:
|
||||
for b in x-(pad) .. x:
|
||||
temp.add((a,b))
|
||||
yield temp
|
||||
|
||||
proc genColorMatrix*() : Tensor[byte] =
|
||||
##Generates a random color pallette
|
||||
randomize(cpuTime().int64)
|
||||
return collect(for x in 1 .. 256*3: (rand(0 .. 255)).byte).toTensor().reshape(256,3)
|
||||
|
||||
proc writeRandomImage(width, height : int, path : string) =
|
||||
##Writes a nonsense image
|
||||
var colorTensor = genColorMatrix()
|
||||
var p = initPixels(width, height)
|
||||
let chunky = collect(for x in chunked(collect(for x in colorTensor: x), 3): x)
|
||||
var n = 0
|
||||
for color in p.mitems:
|
||||
let
|
||||
r = chunky[n][0]
|
||||
g = chunky[n][1]
|
||||
b = chunky[n][2]
|
||||
color.setColor(r, g, b, 255)
|
||||
n+=1
|
||||
simplePNG(path, p)
|
||||
|
||||
|
||||
proc fromPng*(input : string, pad = 0, total : int, multiple = true) : Tensor[byte]=
|
||||
## Input = folder which it is in. eg ./out/0/
|
||||
## uses input/1.png as a benchmark for resolution; it is intended to be used within the confines of the rest of the code
|
||||
## total = the total amount files you want to scan
|
||||
## if you want to use just one, multiple = false
|
||||
|
||||
## Returns a tensor of data, organized by blocks. First block, second block, third block, etc
|
||||
##
|
||||
var png : PNGResult[string]
|
||||
var size : int
|
||||
if multiple:
|
||||
png = loadPNG32(&"{input}1.png")
|
||||
size = png.width
|
||||
else:
|
||||
png = loadPNG32(input)
|
||||
size = png.width
|
||||
|
||||
var filesRead = 0
|
||||
var final : seq[byte]
|
||||
proc decode(x : seq[char]) : seq[byte] =
|
||||
#takes the data, to hex, then converts it to its, then cuts the alphachannel
|
||||
return x.map(x=>($x).toHex()).map(x=>byte(parseHexInt(x)))[0 .. 2]
|
||||
|
||||
proc flatten(a : seq[seq[byte]]) : seq[byte] =
|
||||
for x in a:
|
||||
result.add(x)
|
||||
#out is in a seq[int] of rgba, so, this outs an array of them
|
||||
#decode, flatten, then tensor to get the data
|
||||
#back to the "original" form
|
||||
if pad == 0:
|
||||
return collect(for x in chunked(png.data,4): decode(x)).flatten().toTensor().reshape(size^2, 3)
|
||||
else:
|
||||
#it iterates to the end amount of files given in total
|
||||
#if false it'll only read 1.1
|
||||
proc decodeImage(png : PNGResult[string], final : var seq[byte]) =
|
||||
doAssert(png.height == size, "NON SQUARE SHAPE PADS NOT SUPPORTED")
|
||||
#IO operation to organize
|
||||
#one lines RGBA*vertical pad length, to get each b lock
|
||||
for bigchunk in chunked(png.data, (size*4)*(pad+1)):
|
||||
#organize this into horizontal length blocks
|
||||
#used as a vertical iterator
|
||||
let start = bigchunk.distribute(pad+1, spread=false)
|
||||
for x in countup(0, start[0].high, ((pad+1)*4)):
|
||||
for y in 0 .. start.high:
|
||||
#this seems inefficient, i should look at this again..
|
||||
#later.
|
||||
var a = start[y][x .. (((pad+1)*4)+x)-1]
|
||||
for delete in countup(3, a.high, 4): a.delete(delete-(int((delete+1)/4)-1))
|
||||
final.add(a.map(x=>x.byte))
|
||||
if multiple:
|
||||
for file in 1 .. total:
|
||||
let png = loadPNG32(input & $file & ".png")
|
||||
decodeImage(png, final)
|
||||
filesRead+=1
|
||||
else:
|
||||
decodeImage(png, final)
|
||||
filesRead = 1
|
||||
|
||||
|
||||
let mody = (((pad+1)^2)*3)
|
||||
let test = int(((size^2)*3)/mody)*(filesRead)
|
||||
return final.toTensor().reshape(test,mody)
|
||||
|
||||
iterator writeMultipleFiles(file : Stream, size, filesize, pad : int, outdir : string, reference : ref seq[byte]) : (proc(a: (byte,byte,byte)){.gcsafe.}, byte) {.gcsafe.} =
|
||||
let byteSize = (pad+1)^2
|
||||
let imagesNumber = (((filesize+256) * byteSize) / size^2).ceil().int
|
||||
var written = 0
|
||||
#threading this would be so simple, thats kinda the point of the design.
|
||||
#previous form was a nightmare
|
||||
while true:
|
||||
var cbyte : byte
|
||||
var starty = 0
|
||||
for imageIndex in 1 .. imagesNumber:
|
||||
var image = initPixels(size,size)
|
||||
var it = itPad(pad, size)
|
||||
|
||||
for current in it():
|
||||
if file.atEnd():
|
||||
break
|
||||
|
||||
if starty < 256:
|
||||
cbyte = byte(starty)
|
||||
starty.inc()
|
||||
else:
|
||||
cbyte = file.readChar.byte
|
||||
yield ((proc(input : (byte,byte,byte)) {.gcsafe.} =
|
||||
for x in current:
|
||||
image[x[0],x[1]].setColor(input[0],input[1],input[2], 255)
|
||||
reference[].add(input[0])
|
||||
reference[].add(input[1])
|
||||
reference[].add(input[2])
|
||||
),
|
||||
cbyte)
|
||||
simplePNG(outdir & "/in" & $written & ".png", image)
|
||||
written.inc()
|
||||
|
||||
break
|
||||
proc rowToArray(a : Tensor[byte]) : (byte,byte,byte) {.gcsafe.} =
|
||||
(a[0, 0], a[0,1], a[0, 2])
|
||||
|
||||
proc encodeImage*(pad = 0, path : int, inputFile : string | Stream, pltePath : string | Tensor[byte]) : (Tensor[byte], Tensor[byte]) {.gcsafe.} =
|
||||
var refout = new seq[byte]
|
||||
##not for release
|
||||
|
||||
#let rand = genColorMatrix()
|
||||
#temp bebug matrix
|
||||
var colorPallet : Tensor[byte]
|
||||
when pltePath is string:
|
||||
if pltePath == "":
|
||||
colorPallet = genColorMatrix()
|
||||
else:
|
||||
colorPallet = deSerializeColorPallate(newFileStream(pltePath))
|
||||
else:
|
||||
|
||||
colorPallet = pltePath
|
||||
|
||||
let size = 600
|
||||
|
||||
var file : Stream
|
||||
var fileSize : BiggestInt
|
||||
when inputFile is string:
|
||||
if inputFile == "" or inputFile == "-":
|
||||
while sizeof(stdin) == 0:
|
||||
continue
|
||||
file = newStringStream(newFileStream(stdin).readAll())
|
||||
#dirty code, I don't know else how to get the proper size of stdin
|
||||
fileSize = file.readAll().len()
|
||||
file.setPosition(0)
|
||||
else:
|
||||
file = (newFileStream(inputFile))
|
||||
fileSize = getFileSize(inputFile)
|
||||
else:
|
||||
file = inputFile
|
||||
|
||||
for buff in writeMultipleFiles(file, size, fileSize.int, pad, "in/" & $path, refout):
|
||||
#wMF yeilds a function to apply a color to a block, and a byte of data
|
||||
#this system allows for a greator flexabiliy of color choices and
|
||||
#potential future complexity if needed
|
||||
let row = rowToArray(colorPallet[buff[1].int, 0 .. 2])
|
||||
buff[0](row)
|
||||
|
||||
|
||||
let mody = (((pad+1)^2)*3)
|
||||
return (colorPallet, refout[].toTensor().reshape(int(refout[].len()/mody) , mody))
|
||||
|
||||
# proc decodeImagePerfect(pad : int, simple = true, path : string) {.gcsafe.} =
|
||||
#to be programmed
|
||||
|
||||
proc outMatrix(pad : int, simple = true, path : int, multiple = true) : Tensor[byte]{.gcsafe.} =
|
||||
##wrapper for fromPng
|
||||
let files = collect(for x in os.walkdir("out/" & $path) : x).len()
|
||||
var input = fromPng("out/" & $path & "/out", pad, files, multiple)
|
||||
return input
|
||||
|
||||
|
||||
type
|
||||
train* = ref object of RootObj
|
||||
pre* : (seq[uint8], Metadata)
|
||||
post* : (seq[uint8], Metadata)
|
||||
colors* : (seq[uint8], Metadata)
|
||||
|
||||
|
||||
proc convertFrames*(input : (int, string, string | Stream, bool, string)) {.gcsafe, thread.}=
|
||||
## the main CL convert proc, everything put together
|
||||
## int is the outdir in 'in/x' and 'out/x'
|
||||
## string[0] is the name of the outdir, doesn't matter if the bool is falsy
|
||||
## string[1] is a stream containing encoding data or a string to it
|
||||
## bool is writey, false if you don't wise to write the output in the cerial format
|
||||
## string[2] is the path of a plte file, if you want it, keep blank otherwise
|
||||
let
|
||||
doWrite = input[3]
|
||||
inputFile = input[2]
|
||||
path = input[0]
|
||||
pltePath = input[4]
|
||||
|
||||
let t1 = cpuTime()
|
||||
let file = "in/" & $input[0]
|
||||
if fileExists(file):
|
||||
removeDir(file)
|
||||
discard existsOrCreateDir(file)
|
||||
let one = encodeImage(9, path, inputFile, pltepath)
|
||||
|
||||
discard os.execShellCmd("./tovideo.sh " & $input[0])
|
||||
|
||||
let outy = outMatrix(9, true, 0)
|
||||
|
||||
if doWrite:
|
||||
var outsize = [
|
||||
one[0].shape, one[1].shape, outy.shape
|
||||
]
|
||||
|
||||
var writtey = [toFlatSeq(one[0]), toFlatSeq(one[1]), toFlatSeq(outy)]
|
||||
var outputFile : FileStream
|
||||
|
||||
stderr.writeline input
|
||||
|
||||
if input[1] == "":
|
||||
outputFile = newFileStream(stdout)
|
||||
else:
|
||||
stderr.writeline input
|
||||
stderr.writeline "?"
|
||||
outputFile = newFileStream(input[1], fmWrite)
|
||||
|
||||
serializeTensor(outputFile, outsize, writtey)
|
||||
|
||||
var params {.global.} = commandLineParams()
|
||||
|
||||
proc printHelp() =
|
||||
stderr.writeline "This program encodes data!"
|
||||
stderr.writeline "use -h to display this!"
|
||||
stderr.writeline "-e to encode a file:"
|
||||
stderr.writeline " cl -e [filetoencode] [tensorOutPath.bin] [pallet.plte]"
|
||||
stderr.writeline " to use in pipe mode: cl -e - -"
|
||||
stderr.writeline " to note release tensors: cl [input] x"
|
||||
stderr.writeline "-rf to encode a file forever, generating new pallets"
|
||||
stderr.writeline " -cl -rf [file]"
|
||||
quit(1)
|
||||
when isMainModule:
|
||||
var channel = createShared(Channel[train], sizeof(Channel[train]))
|
||||
channel[].open()
|
||||
var files = (collect(for x in os.walkdir("./trainingdata"): x).len())
|
||||
params.setLen(4)
|
||||
|
||||
if params[0] == "-e":
|
||||
|
||||
let fileToEncode = params[1]
|
||||
let tensorOutPath = params[2]
|
||||
let pltePath = params[3]
|
||||
let doWrite = tensorOutPath != "x"
|
||||
if params[1] == "" and params[2] == "":
|
||||
printHelp()
|
||||
convertFrames((0, tensorOutPath, fileToEncode, doWrite, pltePath))
|
||||
elif params[0] == "-rf":
|
||||
let fileToEncode = params[1]
|
||||
if fileToEncode == "":
|
||||
printHelp()
|
||||
var cores = countProcessors()
|
||||
var trainThreads = newSeq[Thread[(int, string, string, bool, string)]](cores)
|
||||
while true:
|
||||
for x in 0 .. cores:
|
||||
createThread(trainThreads[x], convertFrames, (x, &"trainingdata/{files}.bin", fileToEncode, true, ""))
|
||||
files+=1
|
||||
joinThreads(trainThreads)
|
||||
elif params[0] ==
|
||||
"-h":
|
||||
printHelp()
|
1
cl.nims
Normal file
1
cl.nims
Normal file
|
@ -0,0 +1 @@
|
|||
--threads:on
|
1
compile.sh
Executable file
1
compile.sh
Executable file
|
@ -0,0 +1 @@
|
|||
nim c -d:danger -d:release -d:blas=cblas $1
|
20
encodeLDPC.sh
Normal file
20
encodeLDPC.sh
Normal file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e # Stop if an error occurs
|
||||
|
||||
./ldpc/make-ldpc ./ldpc/ex-ldpcvar-5000a.pchk 5000 10000 2 evenboth 3 no4cycle
|
||||
./ldpc/make-gen ./ldpc/ex-ldpcvar-5000a.pchk ./ldpc/ex-ldpcvar-5000a.gen dense
|
||||
|
||||
./tensorCeral -b $1 \
|
||||
| ./ldpc/encode ./ldpc/ex-ldpcvar-5000a.pchk ./ldpc/ex-ldpcvar-5000a.gen - - \
|
||||
| tr --delete '\n' \
|
||||
| ./tensorCeral -e - - \
|
||||
| ./cl -e - ./temp.bin bestpallete.plte
|
||||
|
||||
./implementation ./temp.bin \
|
||||
| ./tensorCeral -b \
|
||||
| ./ldpc/extract ./ldpc/ex-ldpcvar-5000a.gen - - \
|
||||
| tr --delete '\n' \
|
||||
| ./tensorCeral -e > out.clone
|
||||
./tensorCeral -d $1 out.clone
|
||||
rm temp.bin
|
304
implementation.nim
Normal file
304
implementation.nim
Normal file
|
@ -0,0 +1,304 @@
|
|||
import strformat
|
||||
import tensorCeral
|
||||
import streams
|
||||
import arraymancer
|
||||
import std/stats
|
||||
import math
|
||||
import sequtils
|
||||
import sugar
|
||||
import os
|
||||
import system
|
||||
import algorithm
|
||||
import terminal
|
||||
import illwill
|
||||
import nimpy
|
||||
import tables
|
||||
|
||||
|
||||
|
||||
illwillInit(fullscreen = false)
|
||||
|
||||
type data = ref object of RootObj
|
||||
originalColor : int
|
||||
inputdata : int
|
||||
outputChoices : seq[(int,int)]
|
||||
distanceFromCorrct : int
|
||||
stats : RunningStat
|
||||
|
||||
proc isCorrect(a : data) : bool =
|
||||
return a.originalColor == a.outputChoices[0][1]
|
||||
|
||||
type commands = enum
|
||||
Forward, ForwardGood, ForwardBad, BackwardGood, BackwardBad, Backward, Break, Graph
|
||||
|
||||
|
||||
proc getInput() : commands =
|
||||
##input for the statistic Viewer
|
||||
while true:
|
||||
var key = getKey()
|
||||
case key
|
||||
of Key.None: discard
|
||||
of Key.Escape: quit()
|
||||
of Key.Enter:
|
||||
return Forward
|
||||
of Key.E:
|
||||
echo "e"
|
||||
return ForwardBad
|
||||
of Key.Q:
|
||||
return BackwardBad
|
||||
of Key.A:
|
||||
return BackwardGood
|
||||
of Key.D:
|
||||
return ForwardGood
|
||||
of Key.Space:
|
||||
return Backward
|
||||
of Key.X:
|
||||
return Break
|
||||
of Key.W:
|
||||
return Graph
|
||||
else:
|
||||
continue
|
||||
|
||||
func datainit(a : int, c : int, stats : RunningStat, output : seq[(int,int)]) : data =
|
||||
var correct = 0
|
||||
for x in output:
|
||||
if x[1] == a:
|
||||
break
|
||||
correct+=1
|
||||
break
|
||||
result = data(
|
||||
originalColor : a,
|
||||
inputdata : c,
|
||||
outputChoices : output,
|
||||
distanceFromCorrct : correct,
|
||||
stats : stats
|
||||
)
|
||||
proc printData(data : data, cerial : (Tensor[float32], Tensor[float32], Tensor[float32])) =
|
||||
##prints Data for the statistics viewer
|
||||
eraseScreen()
|
||||
echo "#"
|
||||
echo &"The ordered Choices {data.outputChoices}"
|
||||
echo ""
|
||||
echo &"Correct Byte {data.originalColor}"
|
||||
echo ""
|
||||
echo &"Byte Returned {data.outputChoices[0][1]}"
|
||||
echo ""
|
||||
echo &"input data {cerial[2][data.inputdata, _]}"
|
||||
echo ""
|
||||
echo &"correct reference color {cerial[0][data.originalColor, _]}"
|
||||
echo ""
|
||||
echo &"output's reference color {cerial[0][data.outputChoices[0][1], _]}"
|
||||
echo ""
|
||||
echo &"idk: {data.stats}"
|
||||
echo data.originalColor
|
||||
echo data.outputChoices[0][1]
|
||||
echo "#"
|
||||
|
||||
var plt = pyImport("matplotlib.pyplot")
|
||||
|
||||
|
||||
proc getMeanValues(a : seq[float32]) : seq[int]=
|
||||
var x1 : RunningStat
|
||||
var x2 : RunningStat
|
||||
var x3 : RunningStat
|
||||
for x in countup(0, a.high, 3):
|
||||
x1.push a[x]
|
||||
for x in countup(1, a.high, 3):
|
||||
x2.push a[x]
|
||||
for x in countup(2, a.high, 3):
|
||||
x3.push a[x]
|
||||
|
||||
return [x1.mean, x2.mean, x3.mean, x1.kurtosis, x2.kurtosis, x3.kurtosisS].map(x=>x.int)
|
||||
proc getByte(input : seq[float32], reference : seq[seq[int]]) : (char, seq[(int,int)], seq[int]) =
|
||||
##Gets the most likely byte from an input file, using a reference
|
||||
##currently debug and has more inputs and outputs than it needs to
|
||||
var lowestdif = @[10000,-1]
|
||||
let mean = getMeanValues(input)
|
||||
var tempDebug : seq[(int, int)]
|
||||
for x in 0 .. reference.high:
|
||||
var totaldif = 0
|
||||
totaldif = abs(mean[0]-reference[x][0])^2+abs(mean[1]-reference[x][1])^2+abs(mean[2]-reference[x][2])^2
|
||||
|
||||
if totaldif < lowestdif[0]:
|
||||
|
||||
lowestdif[1] = x
|
||||
lowestdif[0] = totaldif
|
||||
|
||||
tempDebug.add((totaldif, x))
|
||||
return (char(lowestdif[1]), tempDebug, mean)
|
||||
proc correlateBadBytes(a : seq[ptr seq[(int,int)]]) : Table[int, Table[int,int]] =
|
||||
for x in a:
|
||||
var outputChoice = x[][0][1]
|
||||
if not result.hasKey(outputChoice):
|
||||
result[outputChoice] = initTable[int,int]()
|
||||
if result[outputChoice].hasKey(x[][1][1]):
|
||||
result[outputChoice][x[][1][1]] += 1
|
||||
else:
|
||||
result[outputChoice][x[][1][1]] = 1
|
||||
|
||||
proc getGraphData(a: Table[int, Table[int,int]]) : (seq[int], seq[int], seq[int]) =
|
||||
for key,val in a:
|
||||
for innerkey, innerval in val:
|
||||
result[0].add(key)
|
||||
result[1].add(innerkey)
|
||||
result[2].add(innerval)
|
||||
|
||||
|
||||
proc getBestBytes*(inputstream : Stream, output : Stream) : ((Tensor[float32],Tensor[float32],Tensor[float32]), seq[seq[int]]) =
|
||||
##Takes a cerial inputstream, and a stream to output to
|
||||
##Gets the most likely choice from the given data.
|
||||
##Curently very simple, can get sub 3% correctness if lucky
|
||||
|
||||
stderr.writeLine("enter!")
|
||||
let cerial = deSerializeTensors(inputstream)
|
||||
var reference : seq[seq[int]]
|
||||
for x in 0 .. 255:
|
||||
let seqn = cerial[2][x, _].toFlatSeq()
|
||||
reference.add getMeanValues(seqn)
|
||||
|
||||
let max = cerial[2].shape[0]-1
|
||||
for x in 256 .. max:
|
||||
if cerial[2][x, 0 .. 2].toFlatSeq().foldl((a+b)) < 10:
|
||||
break
|
||||
var chary = getByte((cerial[2][x, _].toFlatSeq()), reference)
|
||||
chary[1].sort((a,b)=>cmp(a[0], b[0]))
|
||||
output.write chary[0]
|
||||
return (cerial, reference)
|
||||
|
||||
|
||||
when isMainModule:
|
||||
stderr.writeLine("enter!")
|
||||
var params = commandLineParams()
|
||||
echo params
|
||||
|
||||
var input = newFileStream(params[0])
|
||||
|
||||
var output : FileStream
|
||||
if ["", "-"].contains(params[1]):
|
||||
output = newFileStream(stdout)
|
||||
else:
|
||||
output = newFileStream(params[1], fmReadWrite)
|
||||
|
||||
var (cerial, reference) = getBestBytes(input, output)
|
||||
|
||||
if defined(colordebug):
|
||||
echo params[2]
|
||||
var old = newFileStream(params[2], fmRead)
|
||||
var outputDebugData : seq[data]
|
||||
let max = cerial[2].shape[0]-1
|
||||
|
||||
output.setPosition(0)
|
||||
for x in 256 .. max:
|
||||
if cerial[2][x, 0 .. 2].toFlatSeq().foldl((a+b)) < 10:
|
||||
break
|
||||
var chary = getByte((cerial[2][x, _].toFlatSeq()), reference)
|
||||
let originalByte = byte(old.readChar())
|
||||
|
||||
chary[1].sort((a,b)=>cmp(a[0], b[0]))
|
||||
var stats : RunningStat
|
||||
for x in chary[1]:
|
||||
stats.push x[0]
|
||||
outputDebugData.add datainit(originalByte.int, x, stats, chary[1])
|
||||
|
||||
old.setPosition(0)
|
||||
var dif = 0
|
||||
var total = 0
|
||||
var incorrect : seq[ptr seq[(int,int)]]
|
||||
|
||||
while not old.atEnd:
|
||||
let oldint = old.readChar().int
|
||||
let newint = output.readChar().int
|
||||
if newint != oldint:
|
||||
dif+=1
|
||||
incorrect.add(addr outputDebugData[total].outputChoices)
|
||||
total += 1
|
||||
|
||||
if defined(graph):
|
||||
|
||||
var dirtyIncorrect : seq[ptr seq[(int,int)]]
|
||||
var dirtyTemp : seq[seq[(int,int)]]
|
||||
|
||||
proc getAccuracyDecay(a : seq[(int,int)]) : float =
|
||||
return collect(for y in 0 .. 2: (a[y+1][0] / a[y][0])*100).foldl(a+b)/3.float
|
||||
|
||||
for x in outputDebugData:
|
||||
let output = getAccuracyDecay(x.outputChoices)
|
||||
if output < 300:
|
||||
dirtyIncorrect.add(addr x.outputChoices)
|
||||
if x.outputChoices[0][1] == 202 and x.outputChoices[1][1] == 173:
|
||||
dirtyTemp.add(x.outputChoices)
|
||||
|
||||
var dirtyGraph = getGraphData(correlateBadBytes(dirtyIncorrect))
|
||||
let fig = plt.subplots(1,2,figsize=(1,2))[1]
|
||||
var pure = correlateBadBytes(incorrect)
|
||||
var graph = getGraphData(pure)
|
||||
let totalErrors = graph[2].foldl((a+b))
|
||||
let newErrors = dirtyGraph[2].foldl((a+b))
|
||||
|
||||
discard fig[0].set_ylabel("Original Byte")
|
||||
discard fig[0].set_xlabel("Mistaken Byte")
|
||||
discard fig[0].legend(loc="upper left")
|
||||
discard fig[1].set_ylabel("Original Byte")
|
||||
discard fig[1].set_xlabel("Mistaken Byte")
|
||||
discard fig[1].legend(loc="upper left")
|
||||
discard fig[0].set_title(&"Before statistical isolation of errors:\n {totalErrors}")
|
||||
discard fig[1].set_title(&"After statistical isolation of errors: \n {newErrors} \n {(newERRORS / totalErrors)* 100}% increase")
|
||||
|
||||
discard fig[0].scatter(graph[0], graph[1], graph[2])
|
||||
discard fig[1].scatter(dirtyGraph[0], dirtyGraph[1], dirtyGraph[2])
|
||||
|
||||
discard plt.show()
|
||||
|
||||
let purey = outputDebugData.filter(x=>x.outputChoices[0][1] == 202 and x.outputChoices[1][1] == 173 and x.originalColor != 202).map(x=>x.outputChoices)
|
||||
echo purey.high
|
||||
echo dirtyTemp.high
|
||||
var stat1 : RunningStat
|
||||
for x in purey:
|
||||
stat1.push getAccuracyDecay(x)
|
||||
echo stat1
|
||||
stat1.clear()
|
||||
|
||||
for x in dirtyTemp:
|
||||
stat1.push getAccuracyDecay(x)
|
||||
dirtyTemp = dirtyTemp.filter(x=>purey.contains(x))
|
||||
echo stat1
|
||||
stat1.clear()
|
||||
|
||||
for x in dirtyTemp:
|
||||
stat1.push getAccuracyDecay(x)
|
||||
echo stat1
|
||||
|
||||
var print = true
|
||||
var pos = 0
|
||||
while defined(pager):
|
||||
if print:
|
||||
printData(outputDebugData[pos], cerial)
|
||||
else:
|
||||
print = true
|
||||
let command = getInput()
|
||||
case command:
|
||||
of Break: break
|
||||
of Forward:
|
||||
if outputDebugData.high > pos:
|
||||
pos+=1
|
||||
else:
|
||||
continue
|
||||
of Backward:
|
||||
if pos > 0:
|
||||
pos-=1
|
||||
else:
|
||||
continue
|
||||
of ForwardBad:
|
||||
for x in pos+1 .. outputDebugData.high:
|
||||
if outputDebugData[x].originalColor != outputDebugData[x].outputChoices[0][1]:
|
||||
pos = x
|
||||
break
|
||||
of Graph:
|
||||
var data = outputDebugData[pos].outputChoices.map(x=>x[0])[0 .. 100]
|
||||
var x = collect(for x in 0 .. 100: x)
|
||||
discard plt.scatter(x,data)
|
||||
discard plt.show()
|
||||
discard plt.clf()
|
||||
|
||||
else:
|
||||
break
|
108
install.sh
Executable file
108
install.sh
Executable file
|
@ -0,0 +1,108 @@
|
|||
#!/bin/sh
|
||||
function getManager {
|
||||
if command -v pacman >/dev/null; then
|
||||
echo "pacman"
|
||||
elif command -v apt >/dev/null; then
|
||||
echo "apt"
|
||||
elif command -v yum >/dev/null; then
|
||||
echo "yum"
|
||||
else
|
||||
echo "none"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function hasCommand {
|
||||
|
||||
if ! command -v $1 >/dev/null;
|
||||
then
|
||||
read -s -p $"$2 [N/y]" response
|
||||
case "$response" in
|
||||
[yY])
|
||||
return 1
|
||||
;;
|
||||
*)
|
||||
echo $"cannot proceede without $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
packageManager=$(getManager)
|
||||
if ! hasCommand "nimble" "Nimble was not found on this computer, would you like to install nim from Choosenim?"
|
||||
then
|
||||
echo "Installing choosenim via curl + bash"
|
||||
curl https://nim-lang.org/choosenim/init.sh -sSf | sh
|
||||
fi
|
||||
|
||||
if ! hasCommand "ffmpeg" "FFMPEG not found, would you like to try and install it?"; then
|
||||
case $packageManager in
|
||||
pacman)
|
||||
echo 'install ffmpeg via pacman core repo'
|
||||
sudo pacman -S ffmpeg
|
||||
;;
|
||||
yum)
|
||||
echo "FFMPEG is not avalible in the yum core repo"
|
||||
echo 'sudo yum install epel-release'
|
||||
echo 'sudo yum localinstall --nogpgcheck https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm'
|
||||
echo 'sudo yum install ffmpeg ffmpeg-devel'
|
||||
read -s -p $'are you ok with the following install commands? [N/y]\n' response
|
||||
case "$response" in
|
||||
[yY])
|
||||
sudo yum install epel-release
|
||||
sudo yum localinstall --nogpgcheck https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm
|
||||
sudo yum install ffmpeg ffmpeg-devel
|
||||
;;
|
||||
*)
|
||||
echo "cannot proceede without ffmpeg"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
apt)
|
||||
echo 'install ffmpeg via apt core repo'
|
||||
sudo apt update
|
||||
sudo apt install ffmpeg
|
||||
;;
|
||||
none)
|
||||
echo "'other' package manager. Please install it yourself before continuing!"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if ! hasCommand "pip" "pip not found, would you like to try and install it?"; then
|
||||
case $packageManager in
|
||||
pacman)
|
||||
sudo pacman -S python
|
||||
;;
|
||||
yum)
|
||||
sudo yum install -y python3
|
||||
;;
|
||||
apt)
|
||||
sudo apt install python
|
||||
;;
|
||||
none)
|
||||
echo "please install python on your own"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
if ! python -c 'import pkgutil; print(1 if pkgutil.find_loader("matplotlib") else 0)' > /dev/null; then
|
||||
echo "installing matplotlib!"
|
||||
pip3 install "matplotlib"
|
||||
fi
|
||||
if ! [ -d "ldpc" ]; then
|
||||
mkdir ldpc
|
||||
echo "Installing LDPC"
|
||||
git clone https://github.com/radfordneal/LDPC-codes
|
||||
cd LDPC-codes
|
||||
make
|
||||
./LDPC-install ../ldpc/
|
||||
cd ..
|
||||
rm -rf LDPC-codes
|
||||
fi
|
||||
|
||||
nimble install nimpy simplepng nimpng 'arraymancer@#head' itertools -n illwill > /dev/null
|
154
tensorCeral.nim
Normal file
154
tensorCeral.nim
Normal file
|
@ -0,0 +1,154 @@
|
|||
import strutils
|
||||
import sugar
|
||||
import sequtils
|
||||
import arraymancer
|
||||
import streams
|
||||
import os
|
||||
|
||||
# CERIALIZATION OPERATIONS
|
||||
proc serializeTensor*(outy : Stream, dim : array[3, Metadata], data : array[3, seq[byte]], close = true) =
|
||||
## Takes in a Tensor made of pallete, originalFileColorEncoding, outputData
|
||||
## This is for training purposes, and is useful for machine learning
|
||||
|
||||
for x in 0 .. 2:
|
||||
outy.write(dim[x][0].uint32)
|
||||
outy.write(dim[x][1].uint32)
|
||||
outy.write(len(data[x]).uint32)
|
||||
for x in 0 .. 2:
|
||||
for x in data[x]:
|
||||
outy.write(x.char)
|
||||
outy.flush()
|
||||
if not close:
|
||||
outy.setPosition(0)
|
||||
else:
|
||||
outy.close()
|
||||
|
||||
proc serializeColorPallete*(a : Tensor[byte] | Tensor[float32], output : Stream) =
|
||||
##takes a pallete, makes it into a little file for your transport
|
||||
doAssert(a.shape == [256, 3], "must be a valid shape for a pallete")
|
||||
for x in a:
|
||||
when a is Tensor[float32]:
|
||||
output.write(x.byte)
|
||||
else:
|
||||
output.write(x)
|
||||
output.flush()
|
||||
|
||||
proc deSerializeColorPallate*(inny : Stream) : Tensor[byte] =
|
||||
##the inverse of serializeColorPallete
|
||||
var buffer = newSeq[byte](256*3)
|
||||
discard inny.readData(addr buffer[0], len(buffer))
|
||||
return buffer.toTensor().reshape(256, 3)
|
||||
|
||||
proc deSerializeTensors*(outy : Stream) : (Tensor[float32], Tensor[float32], Tensor[float32]) =
|
||||
## Takes in a Tensor made of pallete, originalFileColorEncoding, outputData
|
||||
## This is for training purposes, and is useful for machine learning
|
||||
## Organizes from disk, of arbitrary size
|
||||
var id : seq[(uint32,uint32,uint32)]
|
||||
outy.setPosition(0)
|
||||
for x in 1 .. 3:
|
||||
var temp : seq[uint32]
|
||||
var buffersize = sizeof(uint32)
|
||||
var buffer = newSeq[uint32](sizeof(uint32))
|
||||
|
||||
for x in 1 .. 3:
|
||||
discard outy.readData(buffer[0].addr, buffersize)
|
||||
temp.add(buffer[0])
|
||||
|
||||
id.add((temp[0], temp[1], temp[2]))
|
||||
for x in 0 .. 2:
|
||||
var buffersize = id[x][2]
|
||||
var buffer = newSeq[byte](id[x][2])
|
||||
#i suspect that this code sometimes crashes if its too big
|
||||
#to resolve this, simply do buffersize/10 then do it in a 10th
|
||||
|
||||
discard outy.readData(buffer[0].addr, buffersize.int)
|
||||
|
||||
let final = (buffer.map(x=>float32(x)).toTensor.reshape(id[x][0].int, id[x][1].int))
|
||||
case x:
|
||||
of 0:
|
||||
result[0] = final
|
||||
of 1:
|
||||
result[1] = final
|
||||
of 2:
|
||||
result[2] = final
|
||||
else:
|
||||
echo "imposibru"
|
||||
|
||||
template arrangeIo(a,b : string) =
|
||||
##Arranges io in the binary functions
|
||||
##allows for smooth stdin stdout management
|
||||
var input {.inject.} : FileStream
|
||||
var output {.inject.} : FileStream
|
||||
case a:
|
||||
of "", "-":
|
||||
input = newFileStream(stdin)
|
||||
else:
|
||||
input = newFileStream(a, fmRead)
|
||||
case b:
|
||||
of "", "-":
|
||||
output = newFileStream(stdout)
|
||||
else:
|
||||
output = newFileStream(b, fmWrite)
|
||||
|
||||
proc convertFileToBinary*(a,b = "") =
|
||||
##This is used in the shell script for LDPC, because of the whacky
|
||||
##Mandatory binary input output
|
||||
#repeated code
|
||||
arrangeIo(a,b)
|
||||
|
||||
while not input.atEnd():
|
||||
let current = input.readChar()
|
||||
let converted = toBin(int(current), 8)
|
||||
output.write(converted)
|
||||
|
||||
output.flush()
|
||||
|
||||
proc convertFileToHex*(a,b = "") =
|
||||
##This is used in the shell script for LDPC, because of the whacky
|
||||
##Mandatory binary input output
|
||||
arrangeIo(a,b)
|
||||
var buffer = newSeq[char](8)
|
||||
|
||||
while not input.atEnd():
|
||||
discard input.readData(buffer[0].addr, 8)
|
||||
let converted = char parseBinInt(buffer.join(""))
|
||||
output.write(converted)
|
||||
|
||||
output.flush()
|
||||
|
||||
|
||||
|
||||
proc diffCheck*(a : Stream, b : Stream) =
|
||||
##diffchecks the streams
|
||||
##used at the end to compare files
|
||||
var incorrect = 0
|
||||
var total = 0
|
||||
var best = 100.0
|
||||
|
||||
for x in 0 .. 40 :
|
||||
b.setPosition(x)
|
||||
a.setPosition(0)
|
||||
while not a.atEnd():
|
||||
total+=1
|
||||
if a.readChar() != b.readChar():
|
||||
incorrect += 1
|
||||
if best > (incorrect / total ) * 100:
|
||||
best = (incorrect / total ) * 100
|
||||
stderr.writeLine best
|
||||
|
||||
|
||||
when isMainModule:
|
||||
var params = commandLineParams()
|
||||
var command = ""
|
||||
if params.len() == 0:
|
||||
quit(1)
|
||||
else:
|
||||
command = params[0]
|
||||
params.setlen(3)
|
||||
|
||||
if params[0] == "-b":
|
||||
convertFileToBinary(params[1],params[2])
|
||||
elif params[0] == "-e":
|
||||
convertFileToHex(params[1],params[2])
|
||||
elif command == "-d":
|
||||
diffCheck(newFileStream(params[1]), newFileStream(params[2]))
|
6
testCerials.sh
Executable file
6
testCerials.sh
Executable file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
for filename in trainingdata/*.bin; do
|
||||
echo $filename
|
||||
./implementation $filename out.clone $1
|
||||
./tensorCeral -d $1 out.clone
|
||||
done
|
11
tovideo.sh
Executable file
11
tovideo.sh
Executable file
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
if ! [ -d "outvideos/" ]; then
|
||||
mkdir outvideos
|
||||
fi
|
||||
if ! [ -d "out/" ]; then
|
||||
mkdir outvideos
|
||||
fi
|
||||
echo "owo"
|
||||
ffmpeg -r 60 -f image2 -s 600x600 -i in/$1/in%0d.png -vcodec libx264 -profile:v high -bf 2 -g 30 -crf 18 -pix_fmt yuv420p -y outvideos/$1temp.mp4 &> /dev/null
|
||||
mkdir -p out/$1/
|
||||
ffmpeg -i outvideos/$1temp.mp4 -vf fps=60/1 -y out/$1/out%0d.png &> /dev/null
|
209
training.nim
Normal file
209
training.nim
Normal file
|
@ -0,0 +1,209 @@
|
|||
import tensorCeral
|
||||
import arraymancer
|
||||
import os
|
||||
import streams
|
||||
import strformat
|
||||
import json
|
||||
import std.jsonutils
|
||||
import random
|
||||
import math
|
||||
import sequtils
|
||||
import tables
|
||||
import sugar
|
||||
import stats
|
||||
import nimpy
|
||||
|
||||
#This program handles the training and serialization of a machine learning algorthym
|
||||
#Due to current technical limitations around serialization, It cannot be saved unfortunately
|
||||
#This code is currently not used.
|
||||
var outdata = initTable[int, seq[(float32, int)]]()
|
||||
for x in 0 .. 301:
|
||||
outdata[x] = @[]
|
||||
randomize()
|
||||
proc getNumberOfFiles(path : string) : int =
|
||||
echo collect(for x in os.walkdir(path) : x)
|
||||
return collect(for x in os.walkdir(path) : x).high+1
|
||||
|
||||
|
||||
proc getUnique(a : Tensor[float32]) : int=
|
||||
var unique : seq[float]
|
||||
for x in a:
|
||||
if not unique.contains(x):
|
||||
unique.add(x)
|
||||
return unique.len()
|
||||
network TwoLayersNet:
|
||||
layers:
|
||||
fc1: Linear(300, 42)
|
||||
fc2: Linear(42, 300)
|
||||
forward x:
|
||||
x.fc1.relu.fc2
|
||||
|
||||
proc save(network: TwoLayersNet[float32], outy : int) =
|
||||
|
||||
network.fc1.weight.value.write_npy(&"model/hiddenweight{$outy}.npy")
|
||||
network.fc1.bias.value.write_npy(&"model/hiddenbias{$outy}.npy")
|
||||
network.fc2.weight.value.write_npy(&"model/outputweight{$outy}.npy")
|
||||
network.fc2.bias.value.write_npy(&"model/outputbias{$outy}.npy")
|
||||
|
||||
proc load*(ctx: Context[Tensor[float32]], inny : int): TwoLayersNet[float32] =
|
||||
result.fc1.weight.value = read_npy[float32](&"model/hiddenweight{inny}.npy")
|
||||
result.fc1.bias.value = read_npy[float32](&"model/hiddenbias{inny}.npy")
|
||||
result.fc2.weight.value = read_npy[float32](&"model/outputweight{inny}.npy")
|
||||
result.fc2.bias.value = read_npy[float32](&"model/outputbias{inny}.npy")
|
||||
|
||||
proc echoUsage() =
|
||||
echo "This program requires stdinputs"
|
||||
echo " To train a model and save it:"
|
||||
echo " -t [tensor1.bin] [tensor2.bin] ..."
|
||||
echo " To anaylize its outputs:"
|
||||
echo " -s [stats1.json] [stats2.json] ..."
|
||||
echo "each program can take between 1 and an infinite number of inputs"
|
||||
|
||||
when isMainModule:
|
||||
var params = commandLineParams()
|
||||
params.setlen(1)
|
||||
if params.len() == 0 or not ["-t","-s"].contains(params[0]):
|
||||
echoUsage()
|
||||
quit(1)
|
||||
if params[0] == "-s":
|
||||
# statistic segement :)
|
||||
if params.len() == 1:
|
||||
echoUsage()
|
||||
quit(1)
|
||||
|
||||
var plt = pyImport("matplotlib.pyplot")
|
||||
let fig = plt.subplots(1, len(params)-1)[1]
|
||||
var figit = 0
|
||||
|
||||
for x in params[1 .. ^1]:
|
||||
var newplot : PyObject
|
||||
if len(params) == 2:
|
||||
#if plt is given 1,1 it is a different type than 1,2+ because python...
|
||||
newplot = fig
|
||||
else:
|
||||
newplot = fig[figit]
|
||||
var table : Table[int, seq[(float, int)]]
|
||||
fromJson(table, parseJson(readFile(x)))
|
||||
var decomp : seq[int]
|
||||
var means : seq[float]
|
||||
var full : RunningStat
|
||||
var tempith : RunningStat
|
||||
var rawTemptih : seq[int]
|
||||
var rawstats : seq[float]
|
||||
|
||||
for x in 3 .. 301:
|
||||
if table[x].len() == 0:
|
||||
continue
|
||||
rawTemptih.add(table[x].len())
|
||||
tempith.push(table[x].len().float)
|
||||
|
||||
for x in 3 .. 301:
|
||||
#15000 is a magic number, but filters out early training abnormalities
|
||||
let temp = table[x].map(x=>x[0]).filter(x => x < 15000)
|
||||
|
||||
if temp.len() == 0:
|
||||
continue
|
||||
|
||||
else:
|
||||
var statistics: RunningStat # must be var
|
||||
statistics.push(temp)
|
||||
full.push(temp)
|
||||
decomp.add(x)
|
||||
rawstats.add(statistics.mean)
|
||||
|
||||
let percent = 1 - ((((tempith.max - temp.len().float ) + tempith.min) * (1 / tempith.max)))
|
||||
means.add(statistics.mean*percent)
|
||||
|
||||
|
||||
let fullMean = collect(for x in 0 .. decomp.high : full.mean)
|
||||
discard newplot.scatter(decomp, rawstats, label="Raw mean value at each occurance")
|
||||
discard newplot.scatter(decomp, means, label="Occurance normalized mean")
|
||||
discard newplot.plot(decomp, fullMean, label="Global mean")
|
||||
discard newplot.set_ylim(0, 15000)
|
||||
discard newplot.set_title(x[1])
|
||||
discard newplot.set_ylabel("loss")
|
||||
discard newplot.set_xlabel("Amount of different variables")
|
||||
discard newplot.legend(loc="upper left")
|
||||
figit+=1
|
||||
discard plt.show()
|
||||
quit()
|
||||
if params[0] == "-t":
|
||||
if params.len() == 1:
|
||||
echoUsage()
|
||||
quit(1)
|
||||
|
||||
var
|
||||
ctx = newContext Tensor[float32]
|
||||
model = ctx.init(TwoLayersNet)
|
||||
optim = model.optimizerSGD(learning_rate = 1e-5'f32)
|
||||
|
||||
var circular : seq[float32]
|
||||
proc addToCache(input : float32) =
|
||||
if circular.len() == 10:
|
||||
circular.delete(0)
|
||||
circular.add(input)
|
||||
|
||||
proc writey(die = false) {.noconv.} =
|
||||
model.save(0)
|
||||
if die:
|
||||
echo circular
|
||||
let outint = getNumberOfFiles("./trainingstats/")
|
||||
echo &"writingoutput to: ./trainingstats/stats{outint}.json"
|
||||
writeFile(&"./trainingstats/stats{outint}.json", $(outdata.toJson()))
|
||||
quit()
|
||||
|
||||
proc exit() {.noconv.} =
|
||||
writey(true)
|
||||
|
||||
setControlCHook(exit)
|
||||
|
||||
var prev = -1.0'f32
|
||||
var it = 0
|
||||
|
||||
var dicty = newTable[int, seq[(int, string)]]()
|
||||
for x in 0 .. 301:
|
||||
dicty[x] = @[]
|
||||
|
||||
proc train() =
|
||||
for tensors in params:
|
||||
var input = newFileStream(tensors)
|
||||
let decompressed = deSerializeTensors(input)
|
||||
let max = (decompressed[1].shape[0])-1
|
||||
for county in 0 .. max:
|
||||
let
|
||||
x1 = ctx.variable(decompressed[2][county .. county+1, _])
|
||||
y1 = decompressed[1][county .. county+1, _]
|
||||
unique = (x1.value).getUnique()
|
||||
#around half
|
||||
#if unique >= 110:
|
||||
#model = models[1]
|
||||
var strike = 0
|
||||
for t in 0 .. 50:
|
||||
var
|
||||
y_pred = model.forward(x1)
|
||||
loss = y_pred.mse_loss(y1)
|
||||
if t mod 10 == 0:
|
||||
echo loss.value[0]
|
||||
outdata[unique].add((loss.value[0], it))
|
||||
if loss.value[0] == prev:
|
||||
if strike == 7:
|
||||
break
|
||||
strike += 1
|
||||
|
||||
if loss.value[0] != prev:
|
||||
if strike != 0:
|
||||
strike = 0
|
||||
loss.backprop()
|
||||
optim.update()
|
||||
prev = loss.value[0]
|
||||
#encase of crashing it writes saves every time
|
||||
echo "hmm"
|
||||
addToCache(prev)
|
||||
it+=1
|
||||
writey()
|
||||
|
||||
#train()
|
||||
|
||||
|
||||
train()
|
||||
writey()
|
62
unit-test.nim
Normal file
62
unit-test.nim
Normal file
|
@ -0,0 +1,62 @@
|
|||
import unittest
|
||||
import arraymancer
|
||||
import tensorCeral
|
||||
import cl
|
||||
import implementation
|
||||
import math
|
||||
import sugar
|
||||
import sequtils
|
||||
import streams
|
||||
import strutils
|
||||
import random
|
||||
import os
|
||||
randomize()
|
||||
suite "encoding":
|
||||
echo "this is the encoding suite!"
|
||||
test "Decode Test":
|
||||
let resolution = 600
|
||||
let pad = 9
|
||||
let maxBytes = (resolution^2 / (pad+1)^2)-256
|
||||
doAssert maxBytes mod 1 == 0
|
||||
discard existsOrCreateDir("./in/")
|
||||
discard existsOrCreateDir("./in/0/")
|
||||
let pallet = genColorMatrix()
|
||||
let garbage = newStringStream(collect(for x in 1 .. maxBytes.int:
|
||||
(parseHexStr(toHex(rand(0 .. 255).byte)))).join(""))
|
||||
let inputData = encodeImage(9, 0, garbage, pallet)[1]
|
||||
let outputData = fromPng("in/0/in0.png" , 9, 0, false)
|
||||
check inputData == outputData
|
||||
test "tensorCerial":
|
||||
var input = newStringStream("")
|
||||
|
||||
serializeTensor(input,
|
||||
[pallet.shape, inputData.shape, outputData.shape],
|
||||
[pallet.toFlatSeq(), inputData.toFlatSeq(), outputData.toFlatSeq()], false)
|
||||
|
||||
let serializationTest = deSerializeTensors(input)
|
||||
|
||||
let toFloat = (
|
||||
pallet.toFlatSeq().map(x=>x.float32).toTensor().reshape(pallet.shape),
|
||||
inputData.toFlatSeq().map(x=>x.float32).toTensor().reshape(inputData.shape),
|
||||
outputdata.toFlatSeq().map(x=>x.float32).toTensor().reshape(outputData.shape)
|
||||
)
|
||||
|
||||
check serializationTest == toFloat
|
||||
input.setPosition(0)
|
||||
test "implementation test":
|
||||
var implementOutput = newStringStream("")
|
||||
discard getBestBytes(input, implementOutput)
|
||||
implementOutput.setPosition(0)
|
||||
garbage.setPosition(0)
|
||||
|
||||
check garbage.readAll() == implementOutput.readAll()
|
||||
test "convertFramesTest":
|
||||
garbage.setPosition(0)
|
||||
convertFrames((0, "outTemp.bin", garbage, true, ""))
|
||||
#honestly if this works, theres no more testing needed, the rest would be implementation issues
|
||||
#which are already checked....
|
||||
#I guess it could be in more depth but its really hard tpo at this stage
|
||||
discard deSerializeTensors(newFileStream("outTemp.bin"))[2]
|
||||
removeFile("outTemp.bin")
|
||||
|
||||
|
Loading…
Reference in a new issue