Electron-Svelte-Recipe-Planner/NimRecipe/NimRecipe.nim

import httpclient
import strtabs  # To access XmlAttributes
import os       # To use splitFile
import strutils # To use cmpIgnoreCase
import tables
import json
import htmlparser
import std/xmltree
import std/jsonutils
import threadpool
#Normally i like do this order:
    #template
    #iterators
    #functions
    #classes
    #vars
    #but i want these allocation vars to be easy to find because of the editing that will need to be constantly done\


#THIS CODE MAY EVENTUALLY FIZZLE OUT AND STALL THEN DIE, THIS IS BECAUSE OF A BUG IN HTTPCLIENT.CLOSE() THAT HAPPENS AFTER MANY CLOSING
#There aren't many work arounds. Not closing them leads into kernel issues involving having too many open files.
#I tried closing them after 10 urls, and having a list of HTTP clients to be called upon but it doesn't work as nicely as you'd think
#Using async to close them isn't really affective either and leads into other problems.


type fancyamount* = ref object of RootObj
  amountnum*: string
  measure*: string

type Recipeline* = ref object of RootObj
  fancy* : fancyamount
  ingredients*: string

type Recipe* = ref object
    name* : string
    author* : string
    url* : string
    lines* : seq[Recipeline]
    img* : string


#this suggests the measurment which can be defined. I can really only use imperial because using metric goes into a whole other can of worms and isn't used in NYTC
#this allows these vars to be executed on threads.

# ! Normally this stuff doesn't work because each thread has its shared heap, but because these are read only its fine?!
var measures* = createShared(array[11, string], sizeof(seq[string]))

measures[] = ["handful", "cup", "cups", "pound", "pounds", "ounce", "ounces", "tablespoons", "tablespoon", "teaspoon", "teaspoons"]
var htmlconversion* = createShared(TableRef[string, string], 200)
#so I remember there being a modual to do this, but i can't find it so we're stuck with the manual version of this
#That is, automatically formating fractions to human readability // floats

htmlconversion[] = {"&#8539;": "1/8", "&#188;" : "1/4",
                    "&#189;" : "1/2", "&#190;" : "3/4",
                    "&#8531;" : "1/3", "&#8532;" : "2/3",
                    "&#8533;" : "1/5", "&#8534;" : "2/5",
                    "&#8535;" : "3/5",
                    "&#8536;" : "4/5", "&#8537;" : "1/6",
                    "&#8538;" : "5/6", "&#8540;" : "3/8",
                    "&#8541;" : "5/8", "&#8542;" : "7/8"
                    }.newTable()

#Keep in mind channels are not stable with threadpool. Though I think its fine here.
var recipeMaster = createShared(Channel[Recipe], 1)
recipeMaster[].open()
var urlChannel = createShared(Channel[string], 1)
urlChannel[].open()
for x in ["tag/spring", "tag/summer", "tag/winter", "tag/fall"]:
    urlChannel[].send(x)

iterator `...`*[T](a: T, b: T): T =
  var res: T = T(a)
  while res <= b:
    yield res
    inc res

proc initFancyAmount(a : string, b :string ) : fancyamount =
    return(fancyamount(amountnum: a, measure : b))


iterator rec(a: ptr Channel[string]) :  string =
    var returncounter = 0
    var toggle = true
    while toggle:
        if a[].peek != 0:
            yield a[].tryRecv().msg
        else:
            if returncounter >= 30000:
                toggle = false
            sleep 10
            inc returncounter


proc newRecipe(name : string, author : string, url : string, lines : seq[RecipeLine], img : string) : Recipe =
    return Recipe(name : name, author : author, url : url, lines : lines, img : img)

proc findsubstring(input : seq[string], substring : string) : seq[int] =
    #returns the lines in which a substring appears
    var returnlist : seq[int]
    #len(input)-1 is used because len is base 1 and loops are base 0
    for line in 0...len(input)-1:
        var currentline = input[line]
        if currentline.contains(substring):
            returnlist.add(line)
        else:
            continue
    if returnlist.len == 0:
        returnlist.add(-1)
        return returnlist

    return returnlist

proc printrecipes(url : string, ssplit: seq[string], client : HttpClient) : Recipe =
    var img : string
    var html = parseHtml(ssplit.join(""))
    var outputrecipe : seq[Recipeline]
    var quanity = findsubstring(ssplit, """<span class="quantity">""")
    var ingredient = findsubstring(ssplit, """<span class="ingredient-name">""")
    var author : string
    var name = "placeholder"

    #Defines the image\
    for a in html.findAll("picture"):
        for a in a.findAll("img"):
            img = a.attrs["src"]

    for node in html.findAll("p"):
        #because htmlparser doesn't include classes we have to just search the string
        if ($node).contains("card-byline"):
            #they only have one item so we just define it like this
            for items in node.items:
                name = $items
            break

    if name == "placeholder":
        #sometimes the earlier method doesn't work, but its always in the JS, so we just parse the orignal for it
        for lines in ssplit:
            if lines.contains("bootstrap.recipe"):
                #returns a json thingy
                name = parseJson(lines.split("= ")[1].split(";")[0])["byline"].getStr
            break
        if name == "placeholder":
            #if theres nothing i can do i just do this
            echo "Yea we tried everything but we cant find the author name"
            return
    #This code can probably optimized with htmlparsing but uh, yea no.
    #this is for assembling the recipes...
    for i in 0...len(quanity)-1:
        var amount : string
        var quanitystring = ssplit[quanity[i]+1].strip()
        var ingredients = ssplit[ingredient[i]+1].strip()
        proc detectMeasure(input: string) : string =
            for x in measures[]:
                if normalize(input).contains(x):
                    return x
            return "none"
        case quanitystring:
            of "":
                let recipe = Recipeline(fancy : initFancyAmount("Unspecified Amount of", "none"), ingredients: ingredients)
                outputrecipe.add(recipe)
                continue
            else:
                #I'd like to say this code is pretty self explantory
                if quanitystring.contains("frac"):

                    if quanitystring[0] == "&"[0]:
                        #all of this fract buiness is an attempt to parse weird html fractions and replace them with human readability
                        var big = quanitystring.split("frac")[1][0 .. ^2][0]
                        var little = quanitystring.split("frac")[1][0 .. ^2][1]
                        var amount = big & "/" & little
                        var recipe = Recipeline(fancy : initFancyAmount(amount, detectMeasure(ingredients)), ingredients: ingredients)
                        outputrecipe.add(recipe)

                    else:
                        var whole = quanitystring.split(" ")[0]
                        var big = quanitystring.split("frac")[1][0 .. ^2][0]
                        var little = quanitystring.split("frac")[1][0 .. ^2][1]
                        var amount =  whole & " " & big & "/" & little
                        var recipe = Recipeline(fancy : initFancyAmount(amount, detectMeasure(ingredients)), ingredients: ingredients)
                        outputrecipe.add(recipe)


                elif quanitystring[0] == "&"[0]:
                    try:
                        var amount = htmlconversion[][quanitystring]
                        var recipe = Recipeline(fancy : initFancyAmount(amount, detectMeasure(ingredients)), ingredients: ingredients)
                        outputrecipe.add(recipe)
                    except:
                        discard "asd"

                else:
                    amount = ssplit[quanity[i]+1].strip()
                    var recipe = Recipeline(fancy : initFancyAmount(amount, detectMeasure(ingredients)), ingredients: ingredients)
                    outputrecipe.add(recipe)

    return newRecipe(name, author, url, outputrecipe, img)

proc geturl(input : string) : seq[string] =
    var page = create(XmlNode, sizeof(XmlNode))
    page[] = parseHtml(input)
    var urllist : seq[string]
    #this is for the tags... obviously
    for node in page[].findAll("a"):
        try:
            var url = node.attrs["href"]
            if "/tag/" in url[0 .. 4]:
                urllist.add(url)
        except KeyError, IndexDefect:
            continue
    #this is for recipes
    for node in page[].findAll("article"):
        try:
            var url = node.attrs["data-url"]
            if "/recipes/" in url[0 .. 9]:
                urllist.add(url)
        except KeyError, IndexDefect:
            continue
    #all links should be in href form
    dealloc page
    return urllist
proc recursive(uwu : string)  {.thread.} =
    let client = newHttpClient()
    try:
        if uwu.contains("%"):
            return
        var webread = client.getContent("https://cooking.nytimes.com/"&uwu)
        var url = geturl(webread)

        if uwu.contains("recipes/"):
            var recipe = printrecipes(uwu, webread.split("\n"), client)
            recipeMaster[].send(recipe)

        for urlx in url:
            urlChannel[].send(urlx)

    except Exception as e:
        echo e.msg
    client.close()
    return
#the channels to communicate stuff

proc scanwebsite() =
    #rec is a iterator.
    #rec will iterate and raise an exception when it reaches the last iteraton in a list.
    #it does this so as the list expands, the loop continues, allowing for a recursive loop
    var counter = 0
    try:
        for url in rec(urlChannel):
            spawn recursive(url)
            inc counter
            echo counter
        echo "writing now"

    except: echo "uwu"

proc writeToFile() =
    #To my knowledge you cant just Json a channel.
    var RecipeHolder : seq[Recipe]
    for x in 1 .. recipeMaster[].peek:
        #this should be memory equal ~ because its freed from the channel.
        RecipeHolder.add(recipeMaster[].tryRecv().msg)
    writeFile("recipes.json", $toJson(RecipeHolder))
proc main() =
    scanwebsite()
    writeToFile()
if isMainModule:

    main()