305 lines
No EOL
12 KiB
Nim
305 lines
No EOL
12 KiB
Nim
import nimpy
|
|
import httpclient
|
|
import q
|
|
import os
|
|
import xmltree
|
|
import sugar
|
|
import sequtils
|
|
import strutils
|
|
import strformat
|
|
import std/db_sqlite
|
|
import std/jsonutils,json
|
|
import random
|
|
import math
|
|
import asyncdispatch
|
|
import schedules
|
|
import times
|
|
randomize()
|
|
|
|
|
|
os.removeDir("processing")
|
|
os.createDir("processing")
|
|
discard os.existsOrCreateDir("archive")
|
|
let db = open("tiktok.db", "", "", "")
|
|
db.exec(sql"CREATE TABLE IF NOT EXISTS videos (id INTEGER NOT NULL, json LONGTEXT NOT NULL)")
|
|
|
|
proc getTotalVideos(tag : string) : int =
|
|
var it = 0
|
|
for x in db.fastRows(sql(fmt"SELECT * FROM {tag}")): it.inc()
|
|
return it
|
|
|
|
proc isVideoInDb(alt : string) : bool =
|
|
for x in collect(for x in db.fastRows(sql"SELECT * FROM videos") : x[1]):
|
|
if x.contains(alt):
|
|
return true
|
|
return false
|
|
|
|
proc inAnyInList(a : openArray[string], b : string) : bool =
|
|
let matching = normalize(b)
|
|
for x in a:
|
|
if normalize(x).contains(matching):
|
|
return true
|
|
return false
|
|
|
|
proc getRights(a : (string, string, HttpClient, ptr Channel[(string, string, int, seq[string])], int)) {.thread.} =
|
|
if a[0] == "N/A":
|
|
a[3][].send((a[0], a[1], a[4], @[]))
|
|
return
|
|
let songName = a[0].replace(" ", "+")
|
|
let artist = a[1].replace(" ", "+")
|
|
#haha they wanna stop me from automating this. This is what happens when you dont offer an api
|
|
#This site will be down before my cookie expires >:)
|
|
a[2].headers = newHttpHeaders({"Cookie" : "disc=2100-02-28T08%3A23%3A00.199Z; RepInstance=instance=REP2&expires=2/28/2100 5:13:54 AM"})
|
|
var page = "!"
|
|
|
|
#So this is some odd code. Sometimes this website will just go "haha, no."
|
|
for x in 0 .. 10:
|
|
try:
|
|
#This is the best db i could find.
|
|
page = a[2].request(fmt"https://repertoire.bmi.com/Search/Search?SearchForm.View_Count=&SearchForm.Main_Search=Title&SearchForm.Main_Search_Text={songName}&SearchForm.Sub_Search=Performer&SearchForm.Sub_Search_Text={artist}&SearchForm.Search_Type=all").body
|
|
break
|
|
except:
|
|
if x == 10:
|
|
sleep 2000
|
|
else:
|
|
#eg 500 * 1.1.. 1.2.. 1.3.. 2x
|
|
sleep int(math.floor(500.0*(1+(x/10))))
|
|
echo "trying again..."
|
|
if page == "!":
|
|
raise newException(OSError, fmt"we where unable to get repertoire data for {a[0]} by {a[1]}")
|
|
|
|
let doc = q(page)
|
|
#So, we have standard selector and an abnornal one.
|
|
#One is for when the rightsholder is well documented, and one where it is not
|
|
let standard = doc.select("a.expander").map(x=>innerText(x))
|
|
var abnormal : string
|
|
try:
|
|
abnormal = doc.select("table.style-01 tbody tr td").map(x=>innerText(x).strip())[0]
|
|
except:
|
|
a[3][].send((a[0], a[1], a[4], standard))
|
|
return
|
|
if standard.len() == 0:
|
|
a[3][].send (a[0], a[1], a[4],@[abnormal])
|
|
else:
|
|
a[3][].send((a[0], a[1], a[4],standard))
|
|
|
|
proc processVideo(a : (int, string, HttpClient)) {.thread.} =
|
|
writeFile(fmt"processing/{a[0]}.mp4", a[2].request(a[1]).body)
|
|
|
|
type videoDetails = object of RootObj
|
|
artist : string
|
|
song : string
|
|
hasSong : bool
|
|
url : string
|
|
author : string
|
|
pageFound : string
|
|
altText : string
|
|
tags : seq[string]
|
|
copyright : seq[string]
|
|
|
|
type IntervalRoutine = object of RootObj
|
|
weeks : int
|
|
days : int
|
|
hours : int
|
|
minutes : int
|
|
seconds : int
|
|
restrictHasSong : bool
|
|
restrictRights : bool
|
|
rightsCriteria : seq[string]
|
|
fetch : seq[(string, int)]
|
|
generate : seq[(string, int)]
|
|
|
|
proc makeRoutine(weeks, days, hours, minutes, seconds : int; fetch, generate : seq[(string, int)],
|
|
restrictHasSong, restrictRights : bool, rightsCriteria : seq[string]) : IntervalRoutine=
|
|
|
|
var normalized : seq[string]
|
|
#we normalize it because, it'll prevent any minor typos from interferring with matching.
|
|
if rightsCriteria.len() != 0:
|
|
normalized = rightsCriteria.map(x=>normalize(x))
|
|
|
|
return IntervalRoutine(weeks : weeks, days : days, hours : hours,
|
|
minutes : minutes, seconds : seconds, fetch : fetch, generate : generate,
|
|
restrictRights : restrictRights,restrictHasSong : restrictHasSong, rightsCriteria : normalized)
|
|
|
|
proc makeVideoDetails(videoIn : (string, string, string), song, author, inputUrl : string, copyright : seq[string]) : videoDetails =
|
|
var tags : seq[string]
|
|
try:
|
|
tags = videoIn[1].split(" ").filter(x=>x.len() != 0).filter(x=>x[0] == '#')
|
|
except:
|
|
tags = @[]
|
|
|
|
if inputUrl.contains("topics"):
|
|
let topic = inputUrl.split("topics/")[1]
|
|
tags.add(fmt"#topic{topic}")
|
|
|
|
elif inputUrl == "https://tiktok.com/":
|
|
tags.add("#topicmain")
|
|
|
|
return (videoDetails(artist : author, song : song, copyright : copyright, url : videoIn[0], altText : videoIn[1], tags : tags, author : videoIn[2], pageFound : inputUrl, hasSong : song == "N/A"))
|
|
|
|
proc getVideosWithSongs(url : string, target : int) =
|
|
let module = readFile("sel.py")
|
|
discard pyBuiltinsModule().exec(module)
|
|
|
|
let getVidsOnPage = pyGlobals()["getVideosOnPage"].to(proc(a : string, b : int) : seq[(string, string, string)] {.gcsafe.})
|
|
let idSong = pyGlobals()["idSong"].to(proc(a : string) : seq[(string, string, int)] {.gcsafe.})
|
|
|
|
let client = newHttpClient()
|
|
client.headers = newHttpHeaders({"Referer" : "https://www.tiktok.com/"})
|
|
|
|
var videos = getVidsOnPage(url, target)
|
|
videos = videos.filter(x=> not isVideoInDb(x[1]))
|
|
if videos.len() == 0:
|
|
return
|
|
let endChannel = createShared(Channel[(string, string, int, seq[string])], 200)
|
|
var thr = newSeq[Thread[(int, string, HttpClient)]](videos.high)
|
|
var endthr = newSeq[Thread[(string, string, HttpClient,
|
|
ptr Channel[(string, string, int, seq[string])], int)]](videos.high)
|
|
for x in 0 .. videos.high:
|
|
try:
|
|
createThread(thr[x], processVideo, (x, videos[x][0], client))
|
|
except:
|
|
break
|
|
joinThreads(thr)
|
|
|
|
let songs = idSong("./processing/").map(x=>(x[0].replace("\\", ""), x[1].replace("\\", ""), x[2]))
|
|
|
|
endChannel[].open()
|
|
for x in 0 .. songs.high:
|
|
try:
|
|
createThread(endthr[x], getRights, (songs[x][0], songs[x][1], client, endChannel, songs[x][2]))
|
|
except:
|
|
break
|
|
joinThreads(endthr)
|
|
let exit = collect(for x in 0 .. endChannel[].peek: endChannel[].tryRecv.msg)
|
|
var start = getTotalVideos("videos")
|
|
|
|
for x in exit:
|
|
var video : videoDetails
|
|
try:
|
|
video = makeVideoDetails(videos[x[2]], x[0], x[1], url, x[3])
|
|
except:
|
|
continue
|
|
if isVideoInDb(videos[x[2]][1]):
|
|
continue
|
|
db.exec(sql"""INSERT INTO videos (id, json)
|
|
VALUES (?, ?)""", start, $tojson(video))
|
|
for tag in video.tags:
|
|
let tag = tag[1 .. ^1]
|
|
try:
|
|
db.exec(sql(fmt"CREATE TABLE IF NOT EXISTS {tag} (lookup INTEGER NOT NULL)"))
|
|
db.exec(sql(fmt"INSERT INTO {tag} (lookup) VALUES (?)"), start)
|
|
except:
|
|
echo tag
|
|
moveFile(fmt"processing/{x[2]}.mp4", fmt"archive/{start}.mp4")
|
|
echo fmt"processing/{x[2]}.mp4; archive/{start}.mp4"
|
|
start.inc()
|
|
#for x in db.fastRows(sql(fmt"SELECT * FROM {tag} WHERE lookup LIKE {rand(total)}")):
|
|
|
|
proc generateRandomVideos(tag : string, amount : int, start = true, ending = true, routine : IntervalRoutine) : seq[string] =
|
|
|
|
let total = getTotalVideos(tag)
|
|
if amount > total:
|
|
raise newException(Exception, fmt"Cannot provide enough unique videos for the amount given due to there not being enough videos.")
|
|
let pending = collect(for x in os.walkDir("processing/"): x[1].split("/")[1].split(".")[0])
|
|
echo "going"
|
|
var choices : seq[string]
|
|
|
|
for x in db.fastRows(sql(fmt"SELECT * FROM {tag}")):
|
|
#they arent in processing... yet :)
|
|
if pending.contains($x[0]):
|
|
continue
|
|
let fileFormat = fmt"file processing/{x[0]}.mp4"
|
|
if choices.contains(fileFormat):
|
|
continue
|
|
if routine.restrictHasSong or routine.restrictRights:
|
|
var current : JsonNode
|
|
#gets the metadata for a song
|
|
for rows in db.fastRows(sql(fmt"SELECT * FROM videos WHERE id LIKE {x[0]}")):
|
|
current = parseJson(rows[1])
|
|
#if a song is found in the video
|
|
if routine.restrictHasSong and current["artist"].getStr != "N/A":
|
|
continue
|
|
else:
|
|
#if the song contains a restricted word
|
|
let noramlized = normalize($current)
|
|
if routine.rightsCriteria.any(restricted=>noramlized.contains(restricted)):
|
|
continue
|
|
choices.add(fileFormat)
|
|
|
|
var repeat = 0
|
|
while result.len() != amount:
|
|
#a timeout, based on, statistics :)
|
|
if repeat == 50:
|
|
echo "NOT ENOUGH VIDEOS TO SUSTAIN >:O, PICK LESS VIDEOS OR LESS RESTRICTIONS"
|
|
break
|
|
let current = choices[rand(total)]
|
|
|
|
if not result.contains(current):
|
|
result.add(current)
|
|
repeat = 0
|
|
repeat.inc()
|
|
|
|
let ids = result.map(x=>x.split("/")[1])
|
|
if start:
|
|
writeFile("concat.txt", result.join("\n"))
|
|
else:
|
|
let concat = readFile("concat.txt") & "\n"
|
|
writeFile("concat.txt", concat & result.join("\n"))
|
|
echo ids
|
|
for x in ids:
|
|
echo x
|
|
os.moveFile(fmt"archive/{x}", fmt"processing/{x}")
|
|
discard os.execShellCmd("./normalizeFramerate.sh normalize")
|
|
if ending:
|
|
discard os.execShellCmd("ffmpeg -f concat -i concat.txt -c copy -y output.mp4")
|
|
for x in walkDir("processing/"):
|
|
let path = x[1]
|
|
let id = path.split("/")[1]
|
|
moveFile(path, fmt"archive/{id}")
|
|
|
|
proc doRoutine(routine : IntervalRoutine) {.gcsafe.} =
|
|
for x in routine.fetch:
|
|
getVideosWithSongs(x[0], x[1])
|
|
echo x
|
|
if routine.generate.len() == 1:
|
|
discard generateRandomVideos(routine.generate[0][0], routine.generate[0][1], routine = routine)
|
|
else:
|
|
let ending = routine.generate.high
|
|
for iteration in 0 .. ending:
|
|
let current = routine.generate[ending]
|
|
#start: true false
|
|
#inbetween: false false
|
|
#final: false true
|
|
discard generateRandomVideos(current[0], current[1],
|
|
0 == iteration, iteration == ending, routine)
|
|
|
|
discard os.execShellCmd("./callback.sh")
|
|
proc Main() =
|
|
echo "Doing what I must because I can :D"
|
|
let routineJson = parseJson(readFile("routine.json"))
|
|
echo "a"
|
|
let routine = (
|
|
makeRoutine(
|
|
routineJson["Weeks"].getInt, routineJson["Days"].getInt,
|
|
routineJson["Hours"].getInt, routineJson["Minutes"].getInt, routineJson["Seconds"].getInt,
|
|
collect(for x in routineJson["Routine"]["Fetch"] : (x[0].getStr, x[1].getInt)),
|
|
collect(for x in routineJson["Routine"]["Generate"] : (x[0].getStr, x[1].getInt)),
|
|
routineJson["Routine"]["Restrictions"]["RestrictHasSong"].getBool,
|
|
routineJson["Routine"]["Restrictions"]["RestrictRights"].getBool,
|
|
collect(for x in routineJson["Routine"]["Restrictions"]["RightsCriteria"] : x.getStr)
|
|
)
|
|
)
|
|
#Scheduler is kinda baby and it doesn't have any checks for "OH IS THIS INPUT 0"
|
|
#Then proceeds to do the math it needs. Obvious problems, y'know.
|
|
#So, we gotta get its total duration in seconds. I could do this myself easily, but this is probably cleaner
|
|
let gap = initDuration(weeks = routine.weeks, days = routine.days, hours = routine.hours, minutes = routine.minutes,
|
|
seconds = routine.seconds)
|
|
let second = gap.inSeconds()
|
|
|
|
doRoutine(routine)
|
|
schedules:
|
|
every(seconds=int(second), async=true):
|
|
doRoutine(routine)
|
|
|
|
Main() |