Zipfs-Law-Language-Detector/execenv/graph.py
2024-10-13 08:04:35 -04:00

114 lines
2.9 KiB
Python

import matplotlib.pyplot as plt
import json
import numpy as np
from numpy._core.arrayprint import format_float_scientific
import sys
def sortDict(x):
return dict(sorted(x.items(), key=lambda item: item[1]))
def collectAlphabet(json):
result = []
for key, val in json.items():
result += val
return result
def sumDict(dict):
total = 0
for key, val in dict.items():
total += val
return total
def createSlope(dict, chars):
total = sumDict(dict)
result = {}
for c in chars:
if c in dict:
cAdd = ""
if c.islower():
cAdd = c.upper()
else:
cAdd = c
percent =(dict[c] / total) * 100
if 0.1 > percent:
result[cAdd] = 0
else:
result[cAdd] = (dict[c] / total) * 100
else:
result[c] = 0
return result
def createChart(slope, chars):
result = []
for c in chars:
if c not in slope:
result.append([c, 0])
else:
result.append([c, slope[c]])
return result
def createSlopeForString(str : str):
result = {}
tempResult = {}
for char in str:
print(char)
if char not in alphabet: continue
charAdd = ""
if char.islower():
charAdd = char.upper()
else:
charAdd = char
if charAdd in tempResult:
tempResult[charAdd] += 1
else:
tempResult[charAdd] = 1
return createSlope(tempResult, alphabet)
def reduceNoise(listOfCharts):
for i in range(0, len(listOfCharts[0])):
hasVal = False
for chart in listOfCharts:
if chart[i][1] != 0:
hasVal = True
break
if not hasVal:
for chart in listOfCharts:
chart[i][1] = -1
for i, chart in enumerate(listOfCharts):
listOfCharts[i] = list(filter(lambda x: x[1] != -1, chart))
def neighborDistance(a,b):
resultBuffer = []
for a1,b1 in zip(a,b):
resultBuffer.append(abs(a1[1]-b1[1]))
print(sum(resultBuffer))
file = open('../data/mostCommonCharacters.json')
result = json.load(file)
print(result["sm"])
file = open('../data/alphabets.json')
alphabet_row = json.load(file)
alphabet = collectAlphabet(alphabet_row)
alphabeticalNumbers = {}
for i, char in enumerate(alphabet):
alphabeticalNumbers[char] = i
array = np.empty(0, dtype=float)
characters = []
args = len(sys.argv)
langs = sys.argv[1:args]
charts = []
for lang in langs:
slope = createSlope(result[lang], alphabet)
chart = createChart(slope, alphabet)
charts.append(chart)
def breakapart(a):
a1 = list(map(lambda x: x[0], a))
b1 = list(map(lambda x: x[1], a))
return (a1, b1)
denoised = charts
reduceNoise(charts)
for c in charts:
broken = breakapart(c)
plt.plot(broken[0], broken[1])
plt.show()