Electron-Svelte-Recipe-Planner/package/NimRecipe/nimServerStuff.py
2021-12-16 00:55:59 -05:00

74 lines
2.9 KiB
Python

import zipfile
import tarfile
import shutil
import urllib.request
import spacy
import textblob
import re
import os
import warnings
warnings.filterwarnings("ignore", message=r"\[W008\]", category=UserWarning)
regex = re.compile('[^a-zA-Z -]')
#we load the medium one, we dont need the big bois
nlp = spacy.load("en_core_web_md")
os.environ["SPACY_WARNING_IGNORE"] = "W008"
#So we need to parse out the measurements because it really thows off the db
measure = ["cup", "cups", "pound", "pounds", "ounce", "ounces",
"tablespoons", "tablespoon", "teaspoon", "teaspoons", "thinly", "halved", "halve", "slice", "coarsely",
"thawed", "finely"]
def listOfNouns(input :str, option = False):
returnlist = []
#gets a word array
text = input.split(" ")
for x in text:
#parses out empty words
if not x:
continue
#this removes all non alphabet characters.
search = regex.sub("", x)
doc = nlp(search)
#similarity is a score of how close two words are in usage.
#these words where chosen based on their potential to filter out english words such as "ground" or "whole"
#while ground and whole may be nouns, they wont have a strong association to cooking, flavor or seasoning
cooking = doc.similarity(nlp("cooking"))
flavor = doc.similarity(nlp("flavor"))
food = doc.similarity(nlp("seasoning"))
vegitable = doc.similarity(nlp("vegitable"))
fruit = doc.similarity(nlp("fruit"))
fish = doc.similarity(nlp("seafood"))
herb = doc.similarity(nlp("herb"))
#debug option.
if option == True:
print(search, cooking)
print(search, flavor)
print(search, food)
#magic numbers. They get more generic as they go on, and so the bar gets higher as well
if cooking > 0.34 or flavor > 0.4 or food > 0.6 or vegitable > 0.64 or fruit > 0.5 or herb > 0.3:
#For some reason some things are considered verbs and its kinda annoying....
if True not in [fish > 0.55, fruit > 0.5, herb > 0.5]:
#this (hopefully) sorts out measurements closely associated with food, and other various impurities
if doc.similarity(nlp("milliliters")) > 0.44 and doc.similarity(nlp("unit")) > 0.44 or search in measure :
if option == True:
print(search)
print(doc.similarity(nlp("milliliters")))
print(doc.similarity(nlp("unit")))
print(doc[0].pos_)
continue
if search in returnlist:
continue
returnlist.append(search)
continue
else:
continue
if option == True:
print(returnlist)
return returnlist
def httpRequest(input : str):
return f'{urllib.request.urlopen(input).read()}'