janusxr-cli/janusxr

127 lines
3.7 KiB
Text
Raw Normal View History

#!/usr/bin/env -S awk -f
function usage() {
2025-11-12 14:58:03 +01:00
print "Usage: ./janusxr health <room_url> [--max-time-per-asset 5] \n" \
" ./janusxr scrape <room_url> <outdir>\n" \
" ./janusxr optimize <room_url>\n"
exit 1
}
# ------------------------------
# Core dispatcher
# ------------------------------
BEGIN {
if (ARGC < 2) usage()
command = ARGV[1]
2025-11-12 14:58:03 +01:00
if (command == "health" || command == "scrape" || command == "optimize" ) {
scrape(command)
} else {
2025-11-12 14:58:03 +01:00
printf("1:"ARGV[0])
printf("1:"ARGV[1])
printf("2:"ARGV[2])
printf("Unknown command: %s\n", command)
usage()
}
}
function scrape( arg, tmpfile, line, attr, check, u) {
url = ARGV[2]
if (url == "") {
print "❌ Missing URL argument."
usage()
}
rooturl = url
sub(/\/[^\/]+$/, "", rooturl)
tmpfile = "/tmp/out.html"
# Fetch HTML page using curl
cmd = "curl -s \"" url "\" -o " tmpfile
if (system(cmd) != 0) { print "❌ Failed to fetch " url; exit 1; }
# Parse attributes from src= or url=
while ((getline line < tmpfile) > 0) {
while (match(line, /(src|url)="[^"]+"/)) {
attr = substr(line, RSTART, RLENGTH)
sub(/^[^=]+="/, "", attr)
sub(/"$/, "", attr)
links[attr] = 1
line = substr(line, RSTART + RLENGTH)
}
}
close(tmpfile)
# Check each extracted links
2025-11-12 14:58:03 +01:00
if( arg == "health" ){
maxtime = ARGV[3]
if ( maxtime == "" ) maxtime = 5
nlinks = 0
nlinksok = 0
for (u in links) {
if( substr(u,1,1) == "/" ) u = rooturl""u
check = "curl -L --max-time "maxtime" -I -s \"" u "\" > /dev/null"
if (system(check) == 0){
nlinksok++
printf("✅ %s\n", u)
}else printf("❌ %s\n", u)
nlinks+=1
}
print "⚕️ health: "(( 100/nlinks )*nlinksok)"%"
if( nlinks != nlinksok ) exit(1)
}
2025-11-12 14:58:03 +01:00
if( arg == "scrape" ) {
outdir = ARGV[3]
if ( outdir == "" ) outdir = "."
system("mkdir "outdir" || true ")
system("cp "tmpfile" "outdir"/index.html")
for (u in links) {
if( substr(u,1,1) == "/" ) u = rooturl""u
check = "curl -L --max-time 20 -I -s \"" u "\" > /dev/null"
if (system(check) == 0 && has_non_html_ext(u) ){
n = split(u, fileparts, "/")
filepart = fileparts[n]
outfile = outdir"/"filepart
system("curl -L --max-time 20 -s \"" u "\" > "outfile)
system("sed -i 's|"u"|"filepart"|g' "outdir"/index.html")
nlinksok++
printf("✅ %s\n", u)
}else printf("🔗 %s\n", u)
nlinks+=1
}
}
2025-11-12 14:58:03 +01:00
if( arg == "optimize" ) {
printf("<!-- copy/paste below into your HTML/JML-file -->\n")
for (u in links) {
if( substr(u,1,1) == "/" ) u = rooturl""u
check = "curl -L --max-time 20 -I -s \"" u "\" > /dev/null"
if (system(check) == 0 && has_non_html_ext(u) ){
printf("<a href='"u"'/>\n")
split(u,urlpart,"/")
domain=urlpart[1]"//"urlpart[3]
if( !domains[domain] ){
2025-11-12 15:19:15 +01:00
printf("<link rel='preconnect' href='"domain"'/>\n")
2025-11-12 14:58:03 +01:00
domains[domain]=1
}
}
}
}
}
# Function: has_non_html_ext
# Returns 1 if the file has an extension after at least one slash and it is not "html"
# Returns 0 otherwise
function has_non_html_ext(file, arr) {
# Check for at least one slash
if (file !~ /\//) return 0
# Match extension after last dot that is not a slash
if (match(file, /\.([^.\/]+)$/, arr)) {
ext = arr[1]
if (ext != "html") return 1
}
return 0
}