> scrape a room URL and rewrite JML to serve local assets
```bash $ ./janusxr --scrape https://www.janusxr.org/newlobby/index.html mydir 🔗 http://dizzket.com/archive/dotmatrix/ 🔗 https://vesta.janusvr.com/nazrin/minecraft-sandbox ✅ http://www.janusvr.com/newlobby/scripts/home.txt ... $ ls -la mydir index.html home.txt ... ```
This commit is contained in:
parent
fde184608f
commit
832ab6e2f4
2 changed files with 81 additions and 19 deletions
26
README.md
26
README.md
|
|
@ -4,12 +4,17 @@ Portable swiss-army knife to automate [janusXR](https://janusxr.org) / JML thing
|
||||||
|
|
||||||
> *What is [janusXR](https://janusxr.org)?* It's an open, user-operated immersive web layer, open-sourced by the JanusVR company, that seamlessly lives within websites, even 12 years later thanks to [janusweb](https://github.com/jbaicoianu/janusweb). It provides a highly viable and easy-to-adopt ecosystem of portals, enabling immersive experiences that challenge the traditional app store paradigm. Get started with [this guide](https://madjin.github.io/janus-guide/#/).
|
> *What is [janusXR](https://janusxr.org)?* It's an open, user-operated immersive web layer, open-sourced by the JanusVR company, that seamlessly lives within websites, even 12 years later thanks to [janusweb](https://github.com/jbaicoianu/janusweb). It provides a highly viable and easy-to-adopt ecosystem of portals, enabling immersive experiences that challenge the traditional app store paradigm. Get started with [this guide](https://madjin.github.io/janus-guide/#/).
|
||||||
|
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: ./janusxr --health <room_url> [--max-time-per-asset 5]
|
||||||
|
./janusxr --scrape <room_url> <outdir>
|
||||||
|
```
|
||||||
|
## Examples
|
||||||
|
|
||||||
> scan a room URL for broken links in JML+HTML
|
> scan a room URL for broken links in JML+HTML
|
||||||
|
|
||||||
```
|
```bash
|
||||||
$ ./janusxr --health http://localhost:8790/models/m5gr26w0wqqs
|
$ ./janusxr --health http://localhost:8790/models/m5gr26w0wqqs
|
||||||
|
|
||||||
✅ http://localhost:8791/templates/xrfragment/%232/website.glb
|
✅ http://localhost:8791/templates/xrfragment/%232/website.glb
|
||||||
|
|
@ -28,7 +33,22 @@ $ ./janusxr --health http://localhost:8790/models/m5gr26w0wqqs
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
# Awk?
|
> scrape a room URL and rewrite JML to serve local assets
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ ./janusxr --scrape https://www.janusxr.org/newlobby/index.html mydir
|
||||||
|
🔗 http://dizzket.com/archive/dotmatrix/
|
||||||
|
🔗 https://vesta.janusvr.com/nazrin/minecraft-sandbox
|
||||||
|
✅ http://www.janusvr.com/newlobby/scripts/home.txt
|
||||||
|
...
|
||||||
|
|
||||||
|
$ ls -la mydir
|
||||||
|
index.html
|
||||||
|
home.txt
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Awk?
|
||||||
|
|
||||||
Why not some superfancy scripting for this task?
|
Why not some superfancy scripting for this task?
|
||||||
|
|
||||||
|
|
|
||||||
74
janusxr
74
janusxr
|
|
@ -1,7 +1,8 @@
|
||||||
#!/usr/bin/env -S awk -f
|
#!/usr/bin/env -S awk -f
|
||||||
|
|
||||||
function usage() {
|
function usage() {
|
||||||
print "Usage: ./janusxr --health <room_url> \n"
|
print "Usage: ./janusxr --health <room_url> [--max-time-per-asset 5] \n" \
|
||||||
|
" ./janusxr --scrape <room_url> <outdir>\n"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -11,15 +12,15 @@ function usage() {
|
||||||
BEGIN {
|
BEGIN {
|
||||||
if (ARGC < 2) usage()
|
if (ARGC < 2) usage()
|
||||||
command = ARGV[1]
|
command = ARGV[1]
|
||||||
if (command == "--health") {
|
if (command == "--health" || command == "--scrape") {
|
||||||
health()
|
scrape(command)
|
||||||
} else {
|
} else {
|
||||||
printf("Unknown command: %s\n", command)
|
printf("Unknown command: %s\n", command)
|
||||||
usage()
|
usage()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function health( tmpfile, line, attr, check, u) {
|
function scrape( arg, tmpfile, line, attr, check, u) {
|
||||||
url = ARGV[2]
|
url = ARGV[2]
|
||||||
if (url == "") {
|
if (url == "") {
|
||||||
print "❌ Missing URL argument."
|
print "❌ Missing URL argument."
|
||||||
|
|
@ -46,18 +47,59 @@ function health( tmpfile, line, attr, check, u) {
|
||||||
close(tmpfile)
|
close(tmpfile)
|
||||||
|
|
||||||
# Check each extracted links
|
# Check each extracted links
|
||||||
nlinks = 0
|
if( arg == "--health" ){
|
||||||
nlinksok = 0
|
maxtime = ARGV[3]
|
||||||
for (u in links) {
|
if ( maxtime == "" ) maxtime = 5
|
||||||
if( substr(u,1,1) == "/" ) u = rooturl""u
|
nlinks = 0
|
||||||
check = "curl -I -s \"" u "\" > /dev/null"
|
nlinksok = 0
|
||||||
if (system(check) == 0){
|
for (u in links) {
|
||||||
nlinksok++
|
if( substr(u,1,1) == "/" ) u = rooturl""u
|
||||||
printf("✅ %s\n", u)
|
check = "curl -L --max-time "maxtime" -I -s \"" u "\" > /dev/null"
|
||||||
}else printf("❌ %s\n", u)
|
if (system(check) == 0){
|
||||||
nlinks+=1
|
nlinksok++
|
||||||
|
printf("✅ %s\n", u)
|
||||||
|
}else printf("❌ %s\n", u)
|
||||||
|
nlinks+=1
|
||||||
|
}
|
||||||
|
print "⚕️ health: "(( 100/nlinks )*nlinksok)"%"
|
||||||
|
if( nlinks != nlinksok ) exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if( arg == "--scrape" ) {
|
||||||
|
outdir = ARGV[3]
|
||||||
|
if ( outdir == "" ) outdir = "."
|
||||||
|
system("mkdir "outdir" || true ")
|
||||||
|
system("cp "tmpfile" "outdir"/index.html")
|
||||||
|
|
||||||
|
for (u in links) {
|
||||||
|
if( substr(u,1,1) == "/" ) u = rooturl""u
|
||||||
|
check = "curl -L --max-time 20 -I -s \"" u "\" > /dev/null"
|
||||||
|
if (system(check) == 0 && has_non_html_ext(u) ){
|
||||||
|
n = split(u, fileparts, "/")
|
||||||
|
filepart = fileparts[n]
|
||||||
|
outfile = outdir"/"filepart
|
||||||
|
system("curl -L --max-time 20 -s \"" u "\" > "outfile)
|
||||||
|
system("sed -i 's|"u"|"filepart"|g' "outdir"/index.html")
|
||||||
|
nlinksok++
|
||||||
|
printf("✅ %s\n", u)
|
||||||
|
}else printf("🔗 %s\n", u)
|
||||||
|
nlinks+=1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
print "⚕️ health: "(( 100/nlinks )*nlinksok)"%"
|
|
||||||
if( nlinks != nlinksok ) exit(1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Function: has_non_html_ext
|
||||||
|
# Returns 1 if the file has an extension after at least one slash and it is not "html"
|
||||||
|
# Returns 0 otherwise
|
||||||
|
function has_non_html_ext(file, arr) {
|
||||||
|
# Check for at least one slash
|
||||||
|
if (file !~ /\//) return 0
|
||||||
|
|
||||||
|
# Match extension after last dot that is not a slash
|
||||||
|
if (match(file, /\.([^.\/]+)$/, arr)) {
|
||||||
|
ext = arr[1]
|
||||||
|
if (ext != "html") return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue