2 # txt2pre --- convert my site's txt files to `pre'-based atom/rss/html
4 # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
5 # Copyright (c) 2021 bandali <bandali@gnu.org>
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as
9 # published by the Free Software Foundation, either version 3 of the
10 # License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <https://www.gnu.org/licenses/>.
20 # This simple script borrows from a script of the same name from the
21 # wonderful public-inbox project, under AGPLv3+, with additions of
24 # Update (2021-11-01): this script isn't currently used for generating
25 # my site's pages anymore; but kept for future reference.
38 GetOptions
('format=s' => \
$format,
43 or die("bad command line arguments\n");
46 $lang eq 'en' ?
'bandali'
47 : $lang eq 'fa' ?
'بندعلی'
50 $lang eq 'en' ?
"${author}'s personal site"
51 : $lang eq 'fa' ?
"سایت شخصی $author"
54 $lang eq 'en' ?
"notes and blog posts by $author"
55 : $lang eq 'fa' ?
"نوشتهها و بلاگ پستهای $author"
58 ($lang eq 'en') ?
'https://bndl.org'
59 : ($lang eq 'fa') ?
'https://bndl.org/fa/'
62 ($lang eq 'en') ?
"tag:bndl.org,2020:notes.$format"
63 : ($lang eq 'fa') ?
"tag:bndl.org,2020:fa/notes.$format"
67 qr{([\
('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
69 (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
70 (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
71 (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
76 "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
77 "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
78 "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
91 $s =~ s/([&<>])/$html_map{$1}/sge;
102 # it's fairly common to end URLs in messages with
103 # '.', ',' or ';' to denote the end of a statement;
104 # assume the intent was to end the statement/sentence
106 if (defined(my $re = $pairs{$beg})) {
107 if ($url =~ s/$re//) {
110 } elsif ($url =~ s/(\))?([\.,;])\z//) {
112 # require ')' to be paired with '('
113 if (defined $1) { # ')'
114 if (index($url, '(') < 0) {
120 } elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
124 $beg . "<a href
=\"$url\">$url</a
>" . $end;
132 # atom/rss feed header and footer
133 if ($index and ($format eq 'atom' or $format eq 'rss')) {
135 my $now_iso8601 = `date -Iseconds -u | tr -d \\\\n`;
136 my $now_rfc5322 = `date -uR | tr -d \\\\n`;
137 my $atom_rel = $format eq 'atom' ? 'self' : 'alternate';
138 my $rss_rel = $format eq 'rss' ? 'self' : 'alternate';
139 my $link = $format eq 'atom' ? 'link' : 'atom:link';
143 <$link hreflang="fa
" href="https
://bndl
.org
/fa/notes
.atom
" rel="alternate
" type="application
/atom+xml" />
144 <$link hreflang
="fa" href
="https://bndl.org/fa/notes.rss" rel
="alternate" type
="application/rss+xml" />
145 <$link hreflang
="fa" href
="https://bndl.org/fa/bandali.fa.txt" rel
="alternate" type
="text/plain" />
146 <$link hreflang
="fa" href
="https://bndl.org/fa/" rel
="alternate" type
="text/html" />
147 <$link href
="https://bndl.org/notes.atom" rel
="$atom_rel" type
="application/atom+xml" />
148 <$link href
="https://bndl.org/notes.rss" rel
="$rss_rel" type
="application/rss+xml" />
149 <$link href
="https://bndl.org/bandali.txt" rel
="alternate" type
="text/plain" />
150 <$link href
="https://bndl.org" rel
="alternate" type
="text/html" />);
151 } elsif ($lang eq 'fa') {
153 <link hreflang
="en" href
="https://bndl.org/notes.atom" rel
="alternate" type
="application/atom+xml" />
154 <link hreflang
="en" href
="https://bndl.org/notes.rss" rel
="alternate" type
="application/rss+xml" />
155 <link hreflang
="en" href
="https://bndl.org/bandali.txt" rel
="alternate" type
="text/plain" />
156 <link hreflang
="en" href
="https://bndl.org" rel
="alternate" type
="text/html" />
157 <link href
="https://bndl.org/fa/notes.atom" rel
="$atom_rel" type
="application/atom+xml" />
158 <link href
="https://bndl.org/fa/notes.rss" rel
="$rss_rel" type
="application/rss+xml" />
159 <link href
="https://bndl.org/fa/bandali.fa.txt" rel
="alternate" type
="text/plain" />
160 <link href
="https://bndl.org/fa/" rel
="alternate" type
="text/html" />);
164 $out .= '<?xml version="1.0" encoding="UTF-8" ?>';
165 $out .= ($format eq 'atom') ?
qq(
166 <feed xml
:lang
="$lang" xmlns
="http://www.w3.org/2005/Atom">
167 <title
>$site_title</title
>
168 <subtitle
>$site_desc</subtitle
>
171 <updated
>$now_iso8601</updated
>)
172 : ($format eq 'rss') ?
qq(
174 xmlns
:atom
="http://www.w3.org/2005/Atom"
175 xmlns
:content
="http://purl.org/rss/1.0/modules/content/">
177 <title
>$site_title</title
>
178 <description
>$site_desc</description
>
179 <link>$site_url</link>
180 <language
>$lang</language
>
181 <lastBuildDate
>$now_rfc5322</lastBuildDate
>
182 <pubDate
>$now_rfc5322</pubDate
>
187 $out .= ($format eq 'atom') ?
'</feed>'
188 : ($format eq 'rss') ?
'</channel></rss>'
197 my $txt = do { local $/; <STDIN
> };
199 my $title = html_esc
($txt =~ /\A([^\n]+)/);
200 $title =~ s/^\s+|\s+$//g;
201 $title .= " — $author" if $title !~ /$author/;
203 my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/;
204 ($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd;
205 ($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub;
206 $upd = $pub if (!$upd);
207 ($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url;
208 $url = 'https://bndl.org/bandali-cv.txt'
209 if (!$url and $title =~ /curriculum vitae/);
210 $url = html_esc
($url) if $url;
212 $txt = linkify
(html_esc
($txt));
215 my $upd_iso8601 = `date -Iseconds -ud '$upd' | tr -d \\\\n` if $upd;
216 my $pub_iso8601 = `date -Iseconds -ud '$pub' | tr -d \\\\n` if $pub;
217 my $pub_rfc5322 = `date -uRd '$pub' | tr -d \\\\n` if $pub;
218 my $url_html = $url =~ s/(?:[.]$lang)?[.]txt$/.html/r if $url;
219 $url_html =~ s
|/bandali-(.*)|/$1| if $url_html;
220 my $slug = $url_html =~ s
|.*/(.*)[.]html
$|$1|r
if $url_html;
221 my $note_id = "$feed_id:$slug" if $url_html;
224 if ($format eq 'html') {
227 . qq(<html lang
="$lang") . ($lang eq 'fa'
231 <meta http
-equiv
="Content-Type"
232 content
="text/html; charset=utf-8" />\n)
233 . "<title>$title</title>\n"
234 . qq(<link rel
="icon" href
="data:,">\n)
236 ?
qq(<link rel
="alternate" href
="$url"
237 title
="plain text" type
="text/plain" />\n)
239 . (($index and $lang eq 'en')
240 ?
qq(<link rel
="alternate" href
="https://bndl.org/fa/"
241 hreflang
="fa" title
="persian" />\n)
242 : ($index and $lang eq 'fa')
243 ?
qq(<link rel
="alternate" href
="https://bndl.org/"
244 hreflang
="en" title
="english" />\n)
246 . qq(<style
>\
@media(prefers
-color
-scheme
:dark
){
247 body
{background
:#1c1c1c;color:white;}a:link{color:#acdeff;}
248 a
:visited
{color
:#f8f;}a:active{color:#e00;}})
250 ?
qq(\n\
@font-face
{font
-family
:sahel
;font
-weight
:normal
;
251 src
:local('Sahel WOL'),local('Sahel'),
252 url
('sahel.woff2')format
('woff2');}pre
{font
-family
:sahel
})
255 . '</head><body><pre>';
256 } elsif ($format eq 'atom' or $format eq 'rss') {
258 ($format eq 'atom') ?
'updated'
259 : ($format eq 'rss') ?
'atom:updated'
262 "<$atom_updated>$upd_iso8601</$atom_updated>\n" if $upd;
263 $out .= ($format eq 'atom') ?
qq(
264 <entry xml
:base
="$site_url">
265 <author
><name
>$author</name></author
>
267 <published
>$pub_iso8601</published
>\n)
268 . ($updated ?
$updated : '') .
269 qq(<link href
="$url" rel
="alternate" type
="text/plain" />
270 <link href
="$url_html" rel
="alternate" type
="text/html" />
271 <title
>$title</title
>
272 <content type
="html"><![CDATA
[<pre
>)
273 : ($format eq 'rss') ?
qq(
275 <title
>$title</title
>
276 <link>$url_html</link>
277 <guid isPermaLink
="false">$note_id</guid
>
278 <pubDate
>$pub_rfc5322</pubDate
>\n)
279 . (($updated and $pub ne $upd) ?
$updated : '') .
280 qq(<content
:encoded
><![CDATA
[<pre
>)
286 if ($format eq 'html') {
287 $out .= '</pre></body></html>';
288 } elsif ($format eq 'atom') {
289 $out .= "</pre>]]></content></entry>";
290 } elsif ($format eq 'rss') {
291 $out .= "</pre>]]></content:encoded></item>";