> scrape a room URL and rewrite JML to serve local assets
```bash $ ./janusxr --scrape https://www.janusxr.org/newlobby/index.html mydir 🔗 http://dizzket.com/archive/dotmatrix/ 🔗 https://vesta.janusvr.com/nazrin/minecraft-sandbox ✅ http://www.janusvr.com/newlobby/scripts/home.txt ... $ ls -la mydir index.html home.txt ... ```
This commit is contained in:
parent
fde184608f
commit
832ab6e2f4
2 changed files with 81 additions and 19 deletions
26
README.md
26
README.md
|
|
@ -4,12 +4,17 @@ Portable swiss-army knife to automate [janusXR](https://janusxr.org) / JML thing
|
|||
|
||||
> *What is [janusXR](https://janusxr.org)?* It's an open, user-operated immersive web layer, open-sourced by the JanusVR company, that seamlessly lives within websites, even 12 years later thanks to [janusweb](https://github.com/jbaicoianu/janusweb). It provides a highly viable and easy-to-adopt ecosystem of portals, enabling immersive experiences that challenge the traditional app store paradigm. Get started with [this guide](https://madjin.github.io/janus-guide/#/).
|
||||
|
||||
|
||||
# Usage
|
||||
|
||||
```
|
||||
Usage: ./janusxr --health <room_url> [--max-time-per-asset 5]
|
||||
./janusxr --scrape <room_url> <outdir>
|
||||
```
|
||||
## Examples
|
||||
|
||||
> scan a room URL for broken links in JML+HTML
|
||||
|
||||
```
|
||||
```bash
|
||||
$ ./janusxr --health http://localhost:8790/models/m5gr26w0wqqs
|
||||
|
||||
✅ http://localhost:8791/templates/xrfragment/%232/website.glb
|
||||
|
|
@ -28,7 +33,22 @@ $ ./janusxr --health http://localhost:8790/models/m5gr26w0wqqs
|
|||
|
||||
```
|
||||
|
||||
# Awk?
|
||||
> scrape a room URL and rewrite JML to serve local assets
|
||||
|
||||
```bash
|
||||
$ ./janusxr --scrape https://www.janusxr.org/newlobby/index.html mydir
|
||||
🔗 http://dizzket.com/archive/dotmatrix/
|
||||
🔗 https://vesta.janusvr.com/nazrin/minecraft-sandbox
|
||||
✅ http://www.janusvr.com/newlobby/scripts/home.txt
|
||||
...
|
||||
|
||||
$ ls -la mydir
|
||||
index.html
|
||||
home.txt
|
||||
...
|
||||
```
|
||||
|
||||
## Awk?
|
||||
|
||||
Why not some superfancy scripting for this task?
|
||||
|
||||
|
|
|
|||
52
janusxr
52
janusxr
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env -S awk -f
|
||||
|
||||
function usage() {
|
||||
print "Usage: ./janusxr --health <room_url> \n"
|
||||
print "Usage: ./janusxr --health <room_url> [--max-time-per-asset 5] \n" \
|
||||
" ./janusxr --scrape <room_url> <outdir>\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
|
@ -11,15 +12,15 @@ function usage() {
|
|||
BEGIN {
|
||||
if (ARGC < 2) usage()
|
||||
command = ARGV[1]
|
||||
if (command == "--health") {
|
||||
health()
|
||||
if (command == "--health" || command == "--scrape") {
|
||||
scrape(command)
|
||||
} else {
|
||||
printf("Unknown command: %s\n", command)
|
||||
usage()
|
||||
}
|
||||
}
|
||||
|
||||
function health( tmpfile, line, attr, check, u) {
|
||||
function scrape( arg, tmpfile, line, attr, check, u) {
|
||||
url = ARGV[2]
|
||||
if (url == "") {
|
||||
print "❌ Missing URL argument."
|
||||
|
|
@ -46,11 +47,14 @@ function health( tmpfile, line, attr, check, u) {
|
|||
close(tmpfile)
|
||||
|
||||
# Check each extracted links
|
||||
if( arg == "--health" ){
|
||||
maxtime = ARGV[3]
|
||||
if ( maxtime == "" ) maxtime = 5
|
||||
nlinks = 0
|
||||
nlinksok = 0
|
||||
for (u in links) {
|
||||
if( substr(u,1,1) == "/" ) u = rooturl""u
|
||||
check = "curl -I -s \"" u "\" > /dev/null"
|
||||
check = "curl -L --max-time "maxtime" -I -s \"" u "\" > /dev/null"
|
||||
if (system(check) == 0){
|
||||
nlinksok++
|
||||
printf("✅ %s\n", u)
|
||||
|
|
@ -61,3 +65,41 @@ function health( tmpfile, line, attr, check, u) {
|
|||
if( nlinks != nlinksok ) exit(1)
|
||||
}
|
||||
|
||||
if( arg == "--scrape" ) {
|
||||
outdir = ARGV[3]
|
||||
if ( outdir == "" ) outdir = "."
|
||||
system("mkdir "outdir" || true ")
|
||||
system("cp "tmpfile" "outdir"/index.html")
|
||||
|
||||
for (u in links) {
|
||||
if( substr(u,1,1) == "/" ) u = rooturl""u
|
||||
check = "curl -L --max-time 20 -I -s \"" u "\" > /dev/null"
|
||||
if (system(check) == 0 && has_non_html_ext(u) ){
|
||||
n = split(u, fileparts, "/")
|
||||
filepart = fileparts[n]
|
||||
outfile = outdir"/"filepart
|
||||
system("curl -L --max-time 20 -s \"" u "\" > "outfile)
|
||||
system("sed -i 's|"u"|"filepart"|g' "outdir"/index.html")
|
||||
nlinksok++
|
||||
printf("✅ %s\n", u)
|
||||
}else printf("🔗 %s\n", u)
|
||||
nlinks+=1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Function: has_non_html_ext
|
||||
# Returns 1 if the file has an extension after at least one slash and it is not "html"
|
||||
# Returns 0 otherwise
|
||||
function has_non_html_ext(file, arr) {
|
||||
# Check for at least one slash
|
||||
if (file !~ /\//) return 0
|
||||
|
||||
# Match extension after last dot that is not a slash
|
||||
if (match(file, /\.([^.\/]+)$/, arr)) {
|
||||
ext = arr[1]
|
||||
if (ext != "html") return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue