Electron-Svelte-Recipe-Planner/NimRecipe/oldcode/NimRecipe.nim

import httpclient
import strtabs  # To access XmlAttributes
import os       # To use splitFile
import strutils # To use cmpIgnoreCase
import tables
import json
import htmlparser
import std/xmltree
import std/jsonutils
import asyncdispatch
import threadpool
import random
randomize()
#Normally i like do this order:
    #template
    #iterators
    #functions
    #classes
    #vars
    #but i want these allocation vars to be easy to find because of the editing that will need to be constantly done


#THIS IS INVALID CODE? IT WORKS FOR A WHILE BUT DOESN'T SUPPLANT THE FACT THAT NIM DOESN'T REALLY WORK WELL WITHOUT A GC
#THIS WAS AN ATTEMPT AT MULTITHREADING, AND ALL MY OTHER ATTEMPTS HAVE FAILED
#THIS HAS SOME COOL IDEAS, THAT I WISH TO ARCHIVE IF I NEED TO COMEBACK TO IT


var pointerBlocks* = createShared(TableRef[float, seq[pointer]], sizeOf(TableRef[float, seq[pointer]]))


template organizedPtr(x: untyped, a  : untyped, c : float) =
    var y = create(typeof(a), sizeof(a))
    pointerBlocks[][c].add(y)
    var x = y[]


template organizedPtr(x : untyped, a  : typedesc, c : float) =
    var x = create(typeof(a), sizeof(a))
    pointerBlocks[][c].add(x)

#you can probably automatically do this with macros on the execution of a proc but for now its just gonna exist in and of itself
#this creates an id block, so you can free all poiints with a single command
#create an ID, to look it up, and then free it, etc
proc cdbs() : int =
    var intholder : seq[int]

    var id = createShared(float, sizeof(float))
    id[] = rand(100000000.0)
    pointerBlocks[][id[]].add(id)
    return id[]


template `!`(a: ptr) =
  dealloc a

proc fcdbs(x: float) =
    for x in pointerBlocks[][x]:
        !(unsafeAddr x)


#so this idea is a bit weird
#im going to make memory blocks of pointers using randon numbers to make a key to stop race conditions, except for vary rare situations


type fancyamount* = ref object of RootObj
  amountnum*: string
  measure*: string

type Recipeline* = ref object of RootObj
  fancy* : fancyamount
  ingredients*: string

type Recipe* = ref object
    name* : string
    author* : string
    url* : string
    lines* : seq[Recipeline]
    img* : string


#this suggests the measurments which can be defined. I can really only use imperial because using metric goes into a whole other can of worms and isn't used in NYTC
#this allows these vars to be executed on threads.
var measures* = createShared(seq[string], sizeof(seq[string]))
measures[] = @["handful", "cup", "cups", "pound", "pounds", "ounce", "ounces", "tablespoons", "tablespoon", "teaspoon", "teaspoons"]
var htmlconversion* = createShared(TableRef[string, string], 200)
htmlconversion[] = {"&#8539;": "1/8", "&#188;" : "1/4",
                    "&#189;" : "1/2", "&#190;" : "3/4",
                    "&#8531;" : "1/3", "&#8532;" : "2/3",
                    "&#8533;" : "1/5", "&#8534;" : "2/5",
                    "&#8535;" : "3/5",
                    "&#8536;" : "4/5", "&#8537;" : "1/6",
                    "&#8538;" : "5/6", "&#8540;" : "3/8",
                    "&#8541;" : "5/8", "&#8542;" : "7/8"
                    }.newTable()
var urlMaster* = createShared(seq[string], sizeof(seq[string]))
urlMaster[] = @["tag/spring", "tag/summer", "tag/winter", "tag/fall"]
var recipeMaster = createShared(seq[Recipe], sizeof(seq[Recipe]))
echo urlMaster.sizeOf()

proc bootlegGC() {.thread.} =
    while true:
        echo urlMaster[].sizeOf()
        sleep 100

iterator `...`*[T](a: T, b: T): T =
  var res: T = T(a)
  while res <= b:
    yield res
    inc res


proc initFancyAmount(a : string, b :string ) : fancyamount =
    return(fancyamount(amountnum: a, measure : b))


iterator rec(a: ptr seq[string]) : ptr string =
    var list = a[]
    var res = 0
    var returncounter = 0
    var toggle = true
    while toggle:
        sleep 10
        list = a[]
        if res != list.len()-1:
            if returncounter != 0:
                returncounter = 0
            var yieldy = create(string, sizeof(string))
            yieldy[] = list[res]
            yield yieldy
            dealloc yieldy
            inc res
        else:
            if returncounter >= 30000:
                toggle = false
            sleep 10
            inc returncounter


proc newRecipe(name : string, author : string, url : string, lines : seq[RecipeLine], img : string) : Recipe =
    return Recipe(name : name, author : author, url : url, lines : lines, img : img)

proc findsubstring(input : seq[string], substring : string) : seq[int] =
    var id = cdbs()
    #returns the lines in which a substring appears
    var returnlist : seq[int]
    #len(input)-1 is used because len is base 1 and loops are base 0
    for line in 0...len(input)-1:
        var currentline = input[line]
        if currentline.contains(substring):
            returnlist.add(line)
        else:
            continue
    if returnlist.len == 0:
        returnlist.add(-1)
        return returnlist

    return returnlist
#so I remember there being a modual to do this, but i can't find it so we're stuck with the manual version of this

proc printrecipes(url : ptr string, ssplit: ptr seq[string]) : Recipe =
    var id = cdbs()
    organizedPtr img, string, id


    organizedPtr html, parseHtml(ssplit[].join("")), id
    organizedPtr utputrecipe, seq[Recipeline], id
    organizedPtr quanity, findsubstring(ssplit[], """<span class="quantity">"""), id
    organizedPtr ingredient, findsubstring(ssplit[], """<span class="ingredient-name">"""), id
    organizedPtr author, string, id
    organizedPtr name, "placeholder", id
    organizedPtr outputrecipe, seq[Recipeline], id
    #Defines the image\
    for a in html.findAll("picture"):
        for a in a.findAll("img"):
            img[] = a.attrs["src"]

    for node in html.findAll("p"):
        #because htmlparser doesn't include classes we have to just search the string
        if ($node).contains("card-byline"):
            #they only have one item so we just define it like this
            for items in node.items:
                name = $items
            break

    if name == "placeholder":
        #sometimes the earlier method doesn't work, but its always in the JS, so we just parse the orignal for it
        for lines in ssplit[]:
            if lines.contains("bootstrap.recipe"):
                #returns a json thingy
                name = parseJson(lines.split("= ")[1].split(";")[0])["byline"].getStr
            break
        if name == "placeholder":
            #if theres nothing i can do i just do this
            echo "Yea we tried everything but we cant find the author name"
            return
    #This code can probably optimized with htmlparsing but uh, yea no.
    #this is for assembling the recipes...
    for i in 0...len(quanity)-1:
        organizedPtr amount, string, id
        organizedPtr quanitystring, ssplit[quanity[i]+1].strip(), id
        organizedPtr ingredients, ssplit[ingredient[i]+1].strip(), id
        proc detectMeasure(input: string) : string =
            for x in measures[]:
                if normalize(input).contains(x):
                    return x
            return "none"
        case quanitystring:
            of "":
                let recipe = Recipeline(fancy : initFancyAmount("Unspecified Amount of", "none"), ingredients: ingredients)
                outputrecipe[].add(recipe)
                continue
            else:
                if quanitystring.contains("frac"):

                    if quanitystring[0] == "&"[0]:
                        organizedPtr big, quanitystring.split("frac")[1][0 .. ^2][0], id
                        organizedPtr little, quanitystring.split("frac")[1][0 .. ^2][1], id
                        amount[] = big & "/" & little
                        organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
                        outputrecipe[].add(recipe)

                    else:
                        organizedPtr whole, quanitystring.split(" ")[0], id
                        organizedPtr big, quanitystring.split("frac")[1][0 .. ^2][0], id
                        organizedPtr little, quanitystring.split("frac")[1][0 .. ^2][1], id
                        amount[] =  whole & " " & big & "/" & little
                        organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
                        outputrecipe[].add(recipe)

                elif quanitystring[0] == "&"[0]:
                    try:
                        amount[] = htmlconversion[][quanitystring]
                        organizedPtr recipe, Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients), id
                        outputrecipe[].add(recipe)
                    except:
                        discard "asd"

                else:
                    amount[] = ssplit[quanity[i]+1].strip()
                    var recipe = Recipeline(fancy : initFancyAmount(amount[], detectMeasure(ingredients)), ingredients: ingredients)
                    outputrecipe[].add(recipe)

    fcdbs(id)
    return newRecipe(name, author[], url[], outputrecipe[], img[])

proc geturl(input : string) : seq[string] =
    var page = create(XmlNode, sizeof(XmlNode))
    page[] = parseHtml(input)
    var urllist : seq[string]
    #this is for the tags... obviously
    for node in page[].findAll("a"):
        try:
            var url = node.attrs["href"]
            if "/tag/" in url[0 .. 4]:
                urllist.add(url)
        except KeyError, IndexDefect:
            continue
    #this is for recipes
    for node in page[].findAll("article"):
        try:
            var url = node.attrs["data-url"]
            if "/recipes/" in url[0 .. 9]:
                urllist.add(url)
        except KeyError, IndexDefect:
            continue
    #all links should be in href form
    !page
    return urllist
proc recursive(uwu : ptr)  {.thread.} =
    let client = newHttpClient()

    try:
        if uwu[].contains("%"):
            return
        var webread = create(seq[string], sizeof(seq[string]))
        webread[] = client.getContent("https://cooking.nytimes.com/"&uwu[]).split("\n")
        var recipe = create(Recipe, Recipe.sizeOf())
        if uwu[].contains("recipes/"):
            recipe[] = printrecipes(uwu, webread)
            recipeMaster[].add(recipe[])
        client.close()
        let url = create(seq[string], sizeOf(seq[string]))

        url[] = geturl(webread[].join(""))
        for urlx in url[]:
            if not urlMaster[].contains(urlx):
                urlMaster[].add(urlx)
        !webread
        !url
        !recipe
        return
    except Exception as e:
        echo e.msg
        return

#the channels to communicate stuff

proc scanwebsite() =
    #rec is a iterator.
    #rec will iterate and raise an exception when it reaches the last iteraton in a list.
    #it does this so as the list expands, the loop continues, allowing for a recursive loop
    var counter = 0
    spawn bootlegGC()
    echo "yea this should be going"
    try:
        for url in rec(urlMaster):
            spawn recursive(url)
           # echo GC_getStatistics()
        echo "writing now"

    except: echo "uwu"

proc writeToFile() =
    writeFile("beta.txt", urlMaster[].join("\n"))
    writeFile("recipes.json", $toJson(recipeMaster))
proc main() =
    scanwebsite()
    writeToFile()
if isMainModule:
    main()