114 lines
2.9 KiB
Python
114 lines
2.9 KiB
Python
import matplotlib.pyplot as plt
|
|
import json
|
|
import numpy as np
|
|
from numpy._core.arrayprint import format_float_scientific
|
|
import sys
|
|
def sortDict(x):
|
|
return dict(sorted(x.items(), key=lambda item: item[1]))
|
|
def collectAlphabet(json):
|
|
result = []
|
|
for key, val in json.items():
|
|
result += val
|
|
return result
|
|
def sumDict(dict):
|
|
total = 0
|
|
for key, val in dict.items():
|
|
total += val
|
|
return total
|
|
def createSlope(dict, chars):
|
|
total = sumDict(dict)
|
|
result = {}
|
|
for c in chars:
|
|
if c in dict:
|
|
cAdd = ""
|
|
if c.islower():
|
|
cAdd = c.upper()
|
|
else:
|
|
cAdd = c
|
|
percent =(dict[c] / total) * 100
|
|
if 0.1 > percent:
|
|
result[cAdd] = 0
|
|
else:
|
|
result[cAdd] = (dict[c] / total) * 100
|
|
else:
|
|
result[c] = 0
|
|
return result
|
|
|
|
def createChart(slope, chars):
|
|
result = []
|
|
for c in chars:
|
|
if c not in slope:
|
|
result.append([c, 0])
|
|
else:
|
|
result.append([c, slope[c]])
|
|
return result
|
|
|
|
def createSlopeForString(str : str):
|
|
result = {}
|
|
tempResult = {}
|
|
for char in str:
|
|
print(char)
|
|
if char not in alphabet: continue
|
|
charAdd = ""
|
|
if char.islower():
|
|
charAdd = char.upper()
|
|
else:
|
|
charAdd = char
|
|
|
|
if charAdd in tempResult:
|
|
tempResult[charAdd] += 1
|
|
else:
|
|
tempResult[charAdd] = 1
|
|
return createSlope(tempResult, alphabet)
|
|
|
|
def reduceNoise(listOfCharts):
|
|
for i in range(0, len(listOfCharts[0])):
|
|
hasVal = False
|
|
for chart in listOfCharts:
|
|
if chart[i][1] != 0:
|
|
hasVal = True
|
|
break
|
|
if not hasVal:
|
|
for chart in listOfCharts:
|
|
chart[i][1] = -1
|
|
|
|
for i, chart in enumerate(listOfCharts):
|
|
listOfCharts[i] = list(filter(lambda x: x[1] != -1, chart))
|
|
def neighborDistance(a,b):
|
|
resultBuffer = []
|
|
for a1,b1 in zip(a,b):
|
|
resultBuffer.append(abs(a1[1]-b1[1]))
|
|
print(sum(resultBuffer))
|
|
|
|
file = open('../data/mostCommonCharacters.json')
|
|
result = json.load(file)
|
|
print(result["sm"])
|
|
file = open('../data/alphabets.json')
|
|
alphabet_row = json.load(file)
|
|
alphabet = collectAlphabet(alphabet_row)
|
|
alphabeticalNumbers = {}
|
|
|
|
for i, char in enumerate(alphabet):
|
|
alphabeticalNumbers[char] = i
|
|
|
|
array = np.empty(0, dtype=float)
|
|
characters = []
|
|
args = len(sys.argv)
|
|
langs = sys.argv[1:args]
|
|
|
|
charts = []
|
|
for lang in langs:
|
|
slope = createSlope(result[lang], alphabet)
|
|
chart = createChart(slope, alphabet)
|
|
charts.append(chart)
|
|
|
|
def breakapart(a):
|
|
a1 = list(map(lambda x: x[0], a))
|
|
b1 = list(map(lambda x: x[1], a))
|
|
return (a1, b1)
|
|
denoised = charts
|
|
reduceNoise(charts)
|
|
for c in charts:
|
|
broken = breakapart(c)
|
|
plt.plot(broken[0], broken[1])
|
|
plt.show()
|