shmage-site

scripts and documents that generate shmage.xyz
git clone git://git.shmage.xyz/shmage-site.git
Log | Files | Refs

rssg (3904B)


      1 #!/bin/sh
      2 #
      3 # https://www.romanzolotarev.com/bin/rssg
      4 # Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
      5 #
      6 # Permission to use, copy, modify, and/or distribute this software for any
      7 # purpose with or without fee is hereby granted, provided that the above
      8 # copyright notice and this permission notice appear in all copies.
      9 #
     10 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     11 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     12 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     13 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     14 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     15 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     16 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     17 #
     18 set -e
     19 
     20 
     21 main () {
     22 	test -n "$1" || usage
     23 	test -n "$2" || usage
     24 	test -f "$1" || no_file "$1"
     25 
     26 
     27 	index_file=$(readlink -f "$1")
     28 	test -z "${index_file##*html}" && html=$(cat "$index_file")
     29 	test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
     30 	test -n "$html" || usage
     31 
     32 	base="${index_file%/*}"
     33 	base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
     34 
     35 	url=$(		echo "$html" | get_url)
     36 
     37 	title="$2"
     38 
     39 	description=$(	echo "$html" | get_description |
     40 			remove_tags |
     41 			remove_nbsp )
     42 
     43 	items=$(	echo "$html" | get_items)
     44 
     45 	rss=$(		echo "$items" |
     46 			render_items "$base" "$base_url" |
     47 			render_feed "$url" "$title" "$description")
     48 
     49 	>&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
     50 	echo "$rss"
     51 }
     52 
     53 
     54 usage() {
     55 	echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
     56 	exit 1
     57 }
     58 
     59 
     60 no_file() {
     61 	echo "${0##*/}: $1: No such file" >&2
     62 	exit 2
     63 }
     64 
     65 
     66 md_to_html() {
     67 	test -x "$(which lowdown)" || exit 3
     68 	lowdown \
     69 	-D html-skiphtml \
     70 	-D smarty \
     71 	-d metadata \
     72 	-d autolink "$1"
     73 }
     74 
     75 
     76 get_title() {
     77 	awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
     78 }
     79 
     80 
     81 get_url() {
     82 	grep -i '<a .*rss.xml"' | head -1 |
     83 	sed 's#.*href="\(.*\)".*#\1#'
     84 }
     85 
     86 
     87 get_items() {
     88 	grep -i 'href=".*" title="' |
     89 	sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
     90 }
     91 
     92 
     93 get_description() {
     94 	start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
     95 	stop='sub("</"s"*"t""s"*>.*","")&&x=1'
     96 	awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
     97 }
     98 
     99 remove_tags() {
    100 	sed 's#<[^>]*>##g;s#</[^>]*>##g'
    101 }
    102 
    103 
    104 remove_nbsp() {
    105 	sed 's#\&nbsp;# #g'
    106 }
    107 
    108 
    109 rel_to_abs_urls() {
    110 	site_url="$1"
    111 	base_url="$2"
    112 
    113 	abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
    114 	rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
    115 	sed -E "$abs;$rel"
    116 }
    117 
    118 
    119 date_rfc_822() {
    120 	date -Rd "$1"
    121 }
    122 
    123 
    124 render_items() {
    125 	while read -r i
    126 	do render_item "$1" "$2" "$i"
    127 	done
    128 }
    129 
    130 
    131 render_item() {
    132 	base="$1"
    133 	base_url="$2"
    134 	item="$3"
    135 
    136 	site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
    137 
    138 	date=$(echo "$item"|awk '{print$2}')
    139 	url=$(echo "$item"|awk '{print$1}')
    140 
    141 	f="$base/$url"
    142 	test -f "$f" && html=$(cat "$f")
    143 	test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
    144 
    145 	description=$(
    146 		echo "$html" |
    147 		rel_to_abs_urls "$site_url" "$base_url" |
    148 		remove_nbsp
    149 	)
    150 	title=$(echo "$description" | get_title)
    151 	guid="$base_url/$(echo "$url" | sed 's#^/##')"
    152 
    153 	echo '
    154 <item>
    155 <guid>'"$guid"'</guid>
    156 <link>'"$guid"'</link>
    157 <pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
    158 <title>'"$title"'</title>
    159 <description><![CDATA[
    160 
    161 '"$description"'
    162 
    163 ]]></description>
    164 </item>'
    165 }
    166 
    167 
    168 render_feed() {
    169 	url="$1"
    170 	title=$(echo "$2" | remove_nbsp)
    171 	description="$3"
    172 
    173 	base_url="$(echo "$url" | cut -d '/' -f1-3)"
    174 
    175 	echo '<?xml version="1.0" encoding="UTF-8"?>
    176 <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    177 <channel>
    178 <atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
    179 <title>'"$title"'</title>
    180 <description>'"$description"'</description>
    181 <link>'"$base_url"'/</link>
    182 <lastBuildDate>'"$(date_rfc_822 "$date")"'</lastBuildDate>
    183 '"$(cat)"'
    184 </channel></rss>'
    185 }
    186 
    187 
    188 main "$@"