|
6 | 6 | SITE_URL="${SITE_URL:-https://learn-software.com}" |
7 | 7 | API_KEY="${API_KEY:-53f1811377874f608f161d768a9c0b78}" |
8 | 8 | KEY_LOCATION="$SITE_URL/$API_KEY.txt" |
| 9 | +SITEMAP_INDEX="public/sitemap.xml" |
9 | 10 |
|
10 | 11 | # IndexNow endpoints |
11 | 12 | INDEXNOW_API="https://api.indexnow.org/indexnow" |
@@ -55,38 +56,70 @@ submit_to_indexnow() { |
55 | 56 | # Main execution |
56 | 57 | echo "=== Hugo IndexNow Automation ===" |
57 | 58 |
|
58 | | -# Check if sitemap exists |
59 | | -if [ ! -f "public/sitemap.xml" ]; then |
60 | | - echo "Error: sitemap.xml not found in public folder" |
61 | | - echo "Make sure Hugo is configured to generate a sitemap" |
62 | | - exit 1 |
| 59 | +# Early validation |
| 60 | +if [ ! -f "$SITEMAP_INDEX" ]; then |
| 61 | + echo "Error: sitemap index not found at $SITEMAP_INDEX" >&2 |
| 62 | + exit 1 |
63 | 63 | fi |
64 | 64 |
|
65 | | -# Extract URLs from sitemap |
66 | | -echo "Extracting URLs from sitemap..." |
67 | | -urls=$(grep -oP '(?<=<loc>)[^<]+' public/sitemap.xml | grep -v "\.xml$" | head -10000) |
| 65 | +# Extract sitemap paths |
| 66 | +sitemap_paths=$(awk -F'<loc>|</loc>' '/<loc>/{print $2}' "$SITEMAP_INDEX") |
| 67 | +if [ -z "$sitemap_paths" ]; then |
| 68 | + echo "No sitemap entries found in sitemap index" >&2 |
| 69 | + exit 1 |
| 70 | +fi |
68 | 71 |
|
69 | | -if [ -z "$urls" ]; then |
70 | | - echo "No URLs found in sitemap" |
71 | | - exit 1 |
| 72 | +url_list="" |
| 73 | +while IFS= read -r sitemap_url; do |
| 74 | + relative=$(echo "$sitemap_url" | sed -E 's~https?://[^/]+/~~') |
| 75 | + local_path="public/$relative" |
| 76 | + if [ ! -f "$local_path" ]; then |
| 77 | + echo "Warning: missing $local_path, skipping…" >&2 |
| 78 | + continue |
| 79 | + fi |
| 80 | + page_urls=$(awk -F'<loc>|</loc>' '/<loc>/{print $2}' "$local_path" | grep -v '\.xml$') |
| 81 | + url_list="${url_list}"$'\n'"${page_urls}" |
| 82 | +done <<< "$sitemap_paths" |
| 83 | + |
| 84 | +# Sanitize: remove blanks and duplicates |
| 85 | +url_list=$(printf "%s\n" "$url_list" | sed '/^\s*$/d' | sort -u) |
| 86 | +if [ -z "$url_list" ]; then |
| 87 | + echo "No URLs found in any sitemap" >&2 |
| 88 | + exit 1 |
72 | 89 | fi |
73 | 90 |
|
74 | | -url_count=$(echo "$urls" | wc -l) |
75 | | -echo "Found $url_count URLs to submit" |
| 91 | +printf "Found %d URLs to submit.\n" "$(printf "%s\n" "$url_list" | wc -l)" |
| 92 | + |
| 93 | +# Build a comma-separated list |
| 94 | +url_array="" |
| 95 | +first=true |
| 96 | +while IFS= read -r url; do |
| 97 | + # Skip empty lines |
| 98 | + [ -z "$url" ] && continue |
76 | 99 |
|
77 | | -# Convert URLs to JSON array format |
78 | | -url_array=$(echo "$urls" | sed 's/.*/"&"/' | paste -sd ',' -) |
| 100 | + # Escape double quotes and backslashes |
| 101 | + esc_url=$(printf '%s' "$url" | sed 's/\\/\\\\/g; s/"/\\"/g') |
| 102 | + |
| 103 | + if $first; then |
| 104 | + url_array="\"$esc_url\"" |
| 105 | + first=false |
| 106 | + else |
| 107 | + url_array="$url_array, \"$esc_url\"" |
| 108 | + fi |
| 109 | +done <<< "$url_list" |
79 | 110 |
|
80 | 111 | # Create JSON payload |
81 | 112 | urls_json=$(cat << EOF |
82 | 113 | { |
83 | 114 | "host": "$SITE_URL", |
84 | 115 | "key": "$API_KEY", |
85 | 116 | "keyLocation": "$KEY_LOCATION", |
86 | | - "urlList": [$url_array] |
| 117 | + "urlList": [ $url_array ] |
87 | 118 | } |
88 | 119 | EOF |
89 | 120 | ) |
90 | 121 |
|
| 122 | +echo $urls_json |
| 123 | + |
91 | 124 | # Submit URLs |
92 | | -submit_to_indexnow "$urls_json" |
| 125 | +# submit_to_indexnow "$urls_json" |
0 commit comments