rssg (3904B)
1 #!/bin/sh 2 # 3 # https://www.romanzolotarev.com/bin/rssg 4 # Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com> 5 # 6 # Permission to use, copy, modify, and/or distribute this software for any 7 # purpose with or without fee is hereby granted, provided that the above 8 # copyright notice and this permission notice appear in all copies. 9 # 10 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 # 18 set -e 19 20 21 main () { 22 test -n "$1" || usage 23 test -n "$2" || usage 24 test -f "$1" || no_file "$1" 25 26 27 index_file=$(readlink -f "$1") 28 test -z "${index_file##*html}" && html=$(cat "$index_file") 29 test -z "${index_file##*md}" && html=$(md_to_html "$index_file") 30 test -n "$html" || usage 31 32 base="${index_file%/*}" 33 base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')" 34 35 url=$( echo "$html" | get_url) 36 37 title="$2" 38 39 description=$( echo "$html" | get_description | 40 remove_tags | 41 remove_nbsp ) 42 43 items=$( echo "$html" | get_items) 44 45 rss=$( echo "$items" | 46 render_items "$base" "$base_url" | 47 render_feed "$url" "$title" "$description") 48 49 >&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items" 50 echo "$rss" 51 } 52 53 54 usage() { 55 echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2 56 exit 1 57 } 58 59 60 no_file() { 61 echo "${0##*/}: $1: No such file" >&2 62 exit 2 63 } 64 65 66 md_to_html() { 67 test -x "$(which lowdown)" || exit 3 68 lowdown \ 69 -D html-skiphtml \ 70 -D smarty \ 71 -d metadata \ 72 -d autolink "$1" 73 } 74 75 76 get_title() { 77 awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}' 78 } 79 80 81 get_url() { 82 grep -i '<a .*rss.xml"' | head -1 | 83 sed 's#.*href="\(.*\)".*#\1#' 84 } 85 86 87 get_items() { 88 grep -i 'href=".*" title="' | 89 sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#' 90 } 91 92 93 get_description() { 94 start='sub("^.*<"s"*"t"("s"[^>]*)?>","")' 95 stop='sub("</"s"*"t""s"*>.*","")&&x=1' 96 awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}" 97 } 98 99 remove_tags() { 100 sed 's#<[^>]*>##g;s#</[^>]*>##g' 101 } 102 103 104 remove_nbsp() { 105 sed 's#\ # #g' 106 } 107 108 109 rel_to_abs_urls() { 110 site_url="$1" 111 base_url="$2" 112 113 abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g' 114 rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g' 115 sed -E "$abs;$rel" 116 } 117 118 119 date_rfc_822() { 120 date -Rd "$1" 121 } 122 123 124 render_items() { 125 while read -r i 126 do render_item "$1" "$2" "$i" 127 done 128 } 129 130 131 render_item() { 132 base="$1" 133 base_url="$2" 134 item="$3" 135 136 site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')" 137 138 date=$(echo "$item"|awk '{print$2}') 139 url=$(echo "$item"|awk '{print$1}') 140 141 f="$base/$url" 142 test -f "$f" && html=$(cat "$f") 143 test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md") 144 145 description=$( 146 echo "$html" | 147 rel_to_abs_urls "$site_url" "$base_url" | 148 remove_nbsp 149 ) 150 title=$(echo "$description" | get_title) 151 guid="$base_url/$(echo "$url" | sed 's#^/##')" 152 153 echo ' 154 <item> 155 <guid>'"$guid"'</guid> 156 <link>'"$guid"'</link> 157 <pubDate>'"$(date_rfc_822 "$date")"'</pubDate> 158 <title>'"$title"'</title> 159 <description><![CDATA[ 160 161 '"$description"' 162 163 ]]></description> 164 </item>' 165 } 166 167 168 render_feed() { 169 url="$1" 170 title=$(echo "$2" | remove_nbsp) 171 description="$3" 172 173 base_url="$(echo "$url" | cut -d '/' -f1-3)" 174 175 echo '<?xml version="1.0" encoding="UTF-8"?> 176 <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> 177 <channel> 178 <atom:link href="'"$url"'" rel="self" type="application/rss+xml" /> 179 <title>'"$title"'</title> 180 <description>'"$description"'</description> 181 <link>'"$base_url"'/</link> 182 <lastBuildDate>'"$(date_rfc_822 "$date")"'</lastBuildDate> 183 '"$(cat)"' 184 </channel></rss>' 185 } 186 187 188 main "$@"