Add Octothorpe indexing script

This commit is contained in:
jultty 2026-05-02 22:37:34 -03:00
commit 8a8ab76e50
2 changed files with 69 additions and 0 deletions

13
scripts/indexed.cache Normal file
View file

@ -0,0 +1,13 @@
https://blog.jutty.dev/posts/half-an-year-on-alpine/
https://blog.jutty.dev/posts/self-hosting-patch/
https://blog.jutty.dev/posts/notice-on-rss-feeds/
https://blog.jutty.dev/posts/unwinding/
https://blog.jutty.dev/posts/introducing-tori/
https://blog.jutty.dev/posts/void-on-zfs/
https://blog.jutty.dev/posts/meeting-the-bsd-family/
https://blog.jutty.dev/notes/tags/
https://blog.jutty.dev/notes/feed-and-links-updates/
https://blog.jutty.dev/notes/waypipe/
https://blog.jutty.dev/notes/shells-timeline/
https://blog.jutty.dev/notes/enjoying-alpine/
https://blog.jutty.dev/notes/notes/

56
scripts/octothorpe-index.sh Executable file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env sh
set -eu
root=https://blog.jutty.dev
cache=indexed.cache
request_pause=7 # 1 second above rate limit of 10/minute
touch "$cache"
feed=$(cat ../public/atom.xml)
filter() {
kind="$1"
printf '%s' "$feed" | grep "<id>$root/$kind/.*</id>" |
grep -v '/drafts/' | sed 's|\s*<id>\(.*\)</id>|\1|'
}
fetch() {
url="$1"
curl -sSLf "https://octothorp.es/get/pages/posted?s=$url"
sleep "$request_pause"
}
index() {
urls="$1"
for url in $urls; do
if grep -qF "$url" "$cache"; then
echo Cached: "$url"
else
response=$(fetch "$url")
length=$(printf '%s' "$response" | jq '.[] | length')
if [ "$length" -gt 0 ]; then
echo Already indexed: "$url"
printf '%s\n' "$url" >> "$cache"
else
echo Indexing: "$url"
response=$(curl -sSLf -X POST https://octothorp.es/index \
-H "Origin: https://blog.jutty.dev" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "uri=$url"
)
if [ "$(printf '%s' "$response" | jq -r .status)" = success ]; then
echo Indexed: "$url"
printf '%s\n' "$url" >> "$cache"
else
echo Error: "$response"
fi
fi
fi
done
}
index "$(filter posts)"
index "$(filter notes)"
#links=$(filter links)