74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
import zipfile
|
|
import tarfile
|
|
import shutil
|
|
import urllib.request
|
|
import spacy
|
|
import textblob
|
|
import re
|
|
import os
|
|
import warnings
|
|
|
|
warnings.filterwarnings("ignore", message=r"\[W008\]", category=UserWarning)
|
|
regex = re.compile('[^a-zA-Z -]')
|
|
|
|
#we load the medium one, we dont need the big bois
|
|
nlp = spacy.load("en_core_web_md")
|
|
os.environ["SPACY_WARNING_IGNORE"] = "W008"
|
|
|
|
|
|
#So we need to parse out the measurements because it really thows off the db
|
|
measure = ["cup", "cups", "pound", "pounds", "ounce", "ounces",
|
|
"tablespoons", "tablespoon", "teaspoon", "teaspoons", "thinly", "halved", "halve", "slice", "coarsely",
|
|
"thawed", "finely"]
|
|
|
|
def listOfNouns(input :str, option = False):
|
|
|
|
returnlist = []
|
|
#gets a word array
|
|
text = input.split(" ")
|
|
for x in text:
|
|
#parses out empty words
|
|
if not x:
|
|
continue
|
|
#this removes all non alphabet characters.
|
|
search = regex.sub("", x)
|
|
doc = nlp(search)
|
|
#similarity is a score of how close two words are in usage.
|
|
#these words where chosen based on their potential to filter out english words such as "ground" or "whole"
|
|
#while ground and whole may be nouns, they wont have a strong association to cooking, flavor or seasoning
|
|
cooking = doc.similarity(nlp("cooking"))
|
|
flavor = doc.similarity(nlp("flavor"))
|
|
food = doc.similarity(nlp("seasoning"))
|
|
vegitable = doc.similarity(nlp("vegitable"))
|
|
fruit = doc.similarity(nlp("fruit"))
|
|
fish = doc.similarity(nlp("seafood"))
|
|
herb = doc.similarity(nlp("herb"))
|
|
#debug option.
|
|
if option == True:
|
|
print(search, cooking)
|
|
print(search, flavor)
|
|
print(search, food)
|
|
#magic numbers. They get more generic as they go on, and so the bar gets higher as well
|
|
if cooking > 0.34 or flavor > 0.4 or food > 0.6 or vegitable > 0.64 or fruit > 0.5 or herb > 0.3:
|
|
#For some reason some things are considered verbs and its kinda annoying....
|
|
if True not in [fish > 0.55, fruit > 0.5, herb > 0.5]:
|
|
#this (hopefully) sorts out measurements closely associated with food, and other various impurities
|
|
if doc.similarity(nlp("milliliters")) > 0.44 and doc.similarity(nlp("unit")) > 0.44 or search in measure :
|
|
if option == True:
|
|
print(search)
|
|
print(doc.similarity(nlp("milliliters")))
|
|
print(doc.similarity(nlp("unit")))
|
|
print(doc[0].pos_)
|
|
continue
|
|
if search in returnlist:
|
|
continue
|
|
returnlist.append(search)
|
|
continue
|
|
else:
|
|
continue
|
|
if option == True:
|
|
print(returnlist)
|
|
return returnlist
|
|
|
|
def httpRequest(input : str):
|
|
return f'{urllib.request.urlopen(input).read()}'
|