Electron-Svelte-Recipe-Planner/NimRecipe/oldcode/NimRecipe.nim
2021-12-16 00:55:59 -05:00

333 lines
11 KiB
Nim

import httpclient
import strtabs # To access XmlAttributes
import os # To use splitFile
import strutils # To use cmpIgnoreCase
import tables
import json
import htmlparser
import std/xmltree
import std/jsonutils
import asyncdispatch
import threadpool
import random
randomize()
#Normally i like do this order:
#template
#iterators
#functions
#classes
#vars
#but i want these allocation vars to be easy to find because of the editing that will need to be constantly done
#THIS IS INVALID CODE? IT WORKS FOR A WHILE BUT DOESN'T SUPPLANT THE FACT THAT NIM DOESN'T REALLY WORK WELL WITHOUT A GC
#THIS WAS AN ATTEMPT AT MULTITHREADING, AND ALL MY OTHER ATTEMPTS HAVE FAILED
#THIS HAS SOME COOL IDEAS, THAT I WISH TO ARCHIVE IF I NEED TO COMEBACK TO IT
var pointerBlocks* = createShared(TableRef[float, seq[pointer]], sizeOf(TableRef[float, seq[pointer]]))
template organizedPtr(x: untyped, a : untyped, c : float) =
var y = create(typeof(a), sizeof(a))
pointerBlocks[][c].add(y)
var x = y[]
template organizedPtr(x : untyped, a : typedesc, c : float) =
var x = create(typeof(a), sizeof(a))
pointerBlocks[][c].add(x)
#you can probably automatically do this with macros on the execution of a proc but for now its just gonna exist in and of itself
#this creates an id block, so you can free all poiints with a single command
#create an ID, to look it up, and then free it, etc
proc cdbs() : int =
var intholder : seq[int]
var id = createShared(float, sizeof(float))
id[] = rand(100000000.0)
pointerBlocks[][id[]].add(id)
return id[]
template `!`(a: ptr) =
dealloc a
proc fcdbs(x: float) =
for x in pointerBlocks[][x]:
!(unsafeAddr x)
#so this idea is a bit weird
#im going to make memory blocks of pointers using randon numbers to make a key to stop race conditions, except for vary rare situations
type fancyamount* = ref object of RootObj
amountnum*: string
measure*: string
type Recipeline* = ref object of RootObj
fancy* : fancyamount
ingredients*: string
type Recipe* = ref object
name* : string
author* : string
url* : string
lines* : seq[Recipeline]
img* : string
#this suggests the measurments which can be defined. I can really only use imperial because using metric goes into a whole other can of worms and isn't used in NYTC
#this allows these vars to be executed on threads.
var measures* = createShared(seq[string], sizeof(seq[string]))
measures[] = @["handful", "cup", "cups", "pound", "pounds", "ounce", "ounces", "tablespoons", "tablespoon", "teaspoon", "teaspoons"]
var htmlconversion* = createShared(TableRef[string, string], 200)
htmlconversion[] = {"⅛": "1/8", "¼" : "1/4",
"½" : "1/2", "¾" : "3/4",
"⅓" : "1/3", "⅔" : "2/3",
"⅕" : "1/5", "⅖" : "2/5",
"⅗" : "3/5",
"⅘" : "4/5", "⅙" : "1/6",
"⅚" : "5/6", "⅜" : "3/8",
"⅝" : "5/8", "⅞" : "7/8"
}.newTable()
var urlMaster* = createShared(seq[string], sizeof(seq[string]))
urlMaster[] = @["tag/spring", "tag/summer", "tag/winter", "tag/fall"]
var recipeMaster = createShared(seq[Recipe], sizeof(seq[Recipe]))
echo urlMaster.sizeOf()
proc bootlegGC() {.thread.} =
while true:
echo urlMaster[].sizeOf()
sleep 100
iterator `...`*[T](a: T, b: T): T =
var res: T = T(a)
while res <= b:
yield res
inc res
proc initFancyAmount(a : string, b :string ) : fancyamount =
return(fancyamount(amountnum: a, measure : b))
iterator rec(a: ptr seq[string]) : ptr string =
var list = a[]
var res = 0
var returncounter = 0
var toggle = true
while toggle:
sleep 10
list = a[]
if res != list.len()-1:
if returncounter != 0:
returncounter = 0
var yieldy = create(string, sizeof(string))
yieldy[] = list[res]
yield yieldy
dealloc yieldy
inc res
else:
if returncounter >= 30000:
toggle = false
sleep 10
inc returncounter
proc newRecipe(name : string, author : string, url : string, lines : seq[RecipeLine], img : string) : Recipe =
return Recipe(name : name, author : author, url : url, lines : lines, img : img)
proc findsubstring(input : seq[string], substring : string) : seq[int] =
var id = cdbs()
#returns the lines in which a substring appears
var returnlist : seq[int]
#len(input)-1 is used because len is base 1 and loops are base 0
for line in 0...len(input)-1:
var currentline = input[line]
if currentline.contains(substring):
returnlist.add(line)
else:
continue
if returnlist.len == 0:
returnlist.add(-1)
return returnlist
return returnlist
#so I remember there being a modual to do this, but i can't find it so we're stuck with the manual version of this
proc printrecipes(url : ptr string, ssplit: ptr seq[string]) : Recipe =
var id = cdbs()
organizedPtr img, string, id
organizedPtr html, parseHtml(ssplit[].join("")), id
organizedPtr utputrecipe, seq[Recipeline], id
organizedPtr quanity, findsubstring(ssplit[], """<span class="quantity">"""), id
organizedPtr ingredient, findsubstring(ssplit[], """<span class="ingredient-name">"""), id
organizedPtr author, string, id
organizedPtr name, "placeholder", id
organizedPtr outputrecipe, seq[Recipeline], id
#Defines the image\
for a in html.findAll("picture"):
for a in a.findAll("img"):
img[] = a.attrs["src"]
for node in html.findAll("p"):
#because htmlparser doesn't include classes we have to just search the string
if ($node).contains("card-byline"):
#they only have one item so we just define it like this
for items in node.items:
name = $items
break
if name == "placeholder":
#sometimes the earlier method doesn't work, but its always in the JS, so we just parse the orignal for it
for lines in ssplit[]:
if lines.contains("bootstrap.recipe"):
#returns a json thingy
name = parseJson(lines.split("= ")[1].split(";")[0])["byline"].getStr
break
if name == "placeholder":
#if theres nothing i can do i just do this
echo "Yea we tried everything but we cant find the author name"
return
#This code can probably optimized with htmlparsing but uh, yea no.
#this is for assembling the recipes...
for i in 0...len(quanity)-1:
organizedPtr amount, string, id
organizedPtr quanitystring, ssplit[quanity[i]+1].strip(), id
organizedPtr ingredients, ssplit[ingredient[i]+1].strip(), id
proc detectMeasure(input: string) : string =
for x in measures[]:
if normalize(input).contains(x):
return x
return "none"
case quanitystring:
of "":
let recipe = Recipeline(fancy : initFancyAmount("Unspecified Amount of", "none"), ingredients: ingredients)
outputrecipe[].add(recipe)
continue
else:
if quanitystring.contains("frac"):
if quanitystring[0] == "&"[0]:
organizedPtr big, quanitystring.split("frac")[1][0 .. ^2][0], id
organizedPtr little, quanitystring.split("frac")[1][0 .. ^2][1], id
amount[] = big & "/" & little
organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
outputrecipe[].add(recipe)
else:
organizedPtr whole, quanitystring.split(" ")[0], id
organizedPtr big, quanitystring.split("frac")[1][0 .. ^2][0], id
organizedPtr little, quanitystring.split("frac")[1][0 .. ^2][1], id
amount[] = whole & " " & big & "/" & little
organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
outputrecipe[].add(recipe)
elif quanitystring[0] == "&"[0]:
try:
amount[] = htmlconversion[][quanitystring]
organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
outputrecipe[].add(recipe)
except:
discard "asd"
else:
amount[] = ssplit[quanity[i]+1].strip()
var recipe = Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients)
outputrecipe[].add(recipe)
fcdbs(id)
return newRecipe(name, author[], url[], outputrecipe[], img[])
proc geturl(input : string) : seq[string] =
var page = create(XmlNode, sizeof(XmlNode))
page[] = parseHtml(input)
var urllist : seq[string]
#this is for the tags... obviously
for node in page[].findAll("a"):
try:
var url = node.attrs["href"]
if "/tag/" in url[0 .. 4]:
urllist.add(url)
except KeyError, IndexDefect:
continue
#this is for recipes
for node in page[].findAll("article"):
try:
var url = node.attrs["data-url"]
if "/recipes/" in url[0 .. 9]:
urllist.add(url)
except KeyError, IndexDefect:
continue
#all links should be in href form
!page
return urllist
proc recursive(uwu : ptr) {.thread.} =
let client = newHttpClient()
try:
if uwu[].contains("%"):
return
var webread = create(seq[string], sizeof(seq[string]))
webread[] = client.getContent("https://cooking.nytimes.com/"&uwu[]).split("\n")
var recipe = create(Recipe, Recipe.sizeOf())
if uwu[].contains("recipes/"):
recipe[] = printrecipes(uwu, webread)
recipeMaster[].add(recipe[])
client.close()
let url = create(seq[string], sizeOf(seq[string]))
url[] = geturl(webread[].join(""))
for urlx in url[]:
if not urlMaster[].contains(urlx):
urlMaster[].add(urlx)
!webread
!url
!recipe
return
except Exception as e:
echo e.msg
return
#the channels to communicate stuff
proc scanwebsite() =
#rec is a iterator.
#rec will iterate and raise an exception when it reaches the last iteraton in a list.
#it does this so as the list expands, the loop continues, allowing for a recursive loop
var counter = 0
spawn bootlegGC()
echo "yea this should be going"
try:
for url in rec(urlMaster):
spawn recursive(url)
# echo GC_getStatistics()
echo "writing now"
except: echo "uwu"
proc writeToFile() =
writeFile("beta.txt", urlMaster[].join("\n"))
writeFile("recipes.json", $toJson(recipeMaster))
proc main() =
scanwebsite()
writeToFile()
if isMainModule:
main()