Skip to content

Commit 8c64076

Browse files
committed
Submit URLs to IndexNow
1 parent 053a659 commit 8c64076

1 file changed

Lines changed: 50 additions & 17 deletions

File tree

.github/workflows/scripts/index-now.sh

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
SITE_URL="${SITE_URL:-https://learn-software.com}"
77
API_KEY="${API_KEY:-53f1811377874f608f161d768a9c0b78}"
88
KEY_LOCATION="$SITE_URL/$API_KEY.txt"
9+
SITEMAP_INDEX="public/sitemap.xml"
910

1011
# IndexNow endpoints
1112
INDEXNOW_API="https://api.indexnow.org/indexnow"
@@ -55,38 +56,70 @@ submit_to_indexnow() {
5556
# Main execution
5657
echo "=== Hugo IndexNow Automation ==="
5758

58-
# Check if sitemap exists
59-
if [ ! -f "public/sitemap.xml" ]; then
60-
echo "Error: sitemap.xml not found in public folder"
61-
echo "Make sure Hugo is configured to generate a sitemap"
62-
exit 1
59+
# Early validation
60+
if [ ! -f "$SITEMAP_INDEX" ]; then
61+
echo "Error: sitemap index not found at $SITEMAP_INDEX" >&2
62+
exit 1
6363
fi
6464

65-
# Extract URLs from sitemap
66-
echo "Extracting URLs from sitemap..."
67-
urls=$(grep -oP '(?<=<loc>)[^<]+' public/sitemap.xml | grep -v "\.xml$" | head -10000)
65+
# Extract sitemap paths
66+
sitemap_paths=$(awk -F'<loc>|</loc>' '/<loc>/{print $2}' "$SITEMAP_INDEX")
67+
if [ -z "$sitemap_paths" ]; then
68+
echo "No sitemap entries found in sitemap index" >&2
69+
exit 1
70+
fi
6871

69-
if [ -z "$urls" ]; then
70-
echo "No URLs found in sitemap"
71-
exit 1
72+
url_list=""
73+
while IFS= read -r sitemap_url; do
74+
relative=$(echo "$sitemap_url" | sed -E 's~https?://[^/]+/~~')
75+
local_path="public/$relative"
76+
if [ ! -f "$local_path" ]; then
77+
echo "Warning: missing $local_path, skipping…" >&2
78+
continue
79+
fi
80+
page_urls=$(awk -F'<loc>|</loc>' '/<loc>/{print $2}' "$local_path" | grep -v '\.xml$')
81+
url_list="${url_list}"$'\n'"${page_urls}"
82+
done <<< "$sitemap_paths"
83+
84+
# Sanitize: remove blanks and duplicates
85+
url_list=$(printf "%s\n" "$url_list" | sed '/^\s*$/d' | sort -u)
86+
if [ -z "$url_list" ]; then
87+
echo "No URLs found in any sitemap" >&2
88+
exit 1
7289
fi
7390

74-
url_count=$(echo "$urls" | wc -l)
75-
echo "Found $url_count URLs to submit"
91+
printf "Found %d URLs to submit.\n" "$(printf "%s\n" "$url_list" | wc -l)"
92+
93+
# Build a comma-separated list
94+
url_array=""
95+
first=true
96+
while IFS= read -r url; do
97+
# Skip empty lines
98+
[ -z "$url" ] && continue
7699

77-
# Convert URLs to JSON array format
78-
url_array=$(echo "$urls" | sed 's/.*/"&"/' | paste -sd ',' -)
100+
# Escape double quotes and backslashes
101+
esc_url=$(printf '%s' "$url" | sed 's/\\/\\\\/g; s/"/\\"/g')
102+
103+
if $first; then
104+
url_array="\"$esc_url\""
105+
first=false
106+
else
107+
url_array="$url_array, \"$esc_url\""
108+
fi
109+
done <<< "$url_list"
79110

80111
# Create JSON payload
81112
urls_json=$(cat << EOF
82113
{
83114
"host": "$SITE_URL",
84115
"key": "$API_KEY",
85116
"keyLocation": "$KEY_LOCATION",
86-
"urlList": [$url_array]
117+
"urlList": [ $url_array ]
87118
}
88119
EOF
89120
)
90121

122+
echo $urls_json
123+
91124
# Submit URLs
92-
submit_to_indexnow "$urls_json"
125+
# submit_to_indexnow "$urls_json"

0 commit comments

Comments
 (0)