Commit | Line | Data |
---|---|---|
d3adcff4 | 1 | #!/usr/bin/env perl |
22e69f33 | 2 | # txt2pre --- convert my site's txt files to `pre'-based atom/rss/html |
d3adcff4 AB |
3 | |
4 | # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> | |
7808cfc7 | 5 | # Copyright (c) 2021 bandali <bandali@gnu.org> |
d3adcff4 AB |
6 | # |
7 | # This program is free software: you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Affero General Public License as | |
9 | # published by the Free Software Foundation, either version 3 of the | |
10 | # License, or (at your option) any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Affero General Public License for more details. | |
16 | # | |
17 | # You should have received a copy of the GNU Affero General Public License | |
18 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
19 | ||
4ce760d0 AB |
20 | # This simple script borrows from a script of the same name from the |
21 | # wonderful public-inbox project, under AGPLv3+, with additions of | |
22 | # my own. | |
d3adcff4 | 23 | |
7808cfc7 AB |
24 | # Update (2021-11-01): this script isn't currently used for generating |
25 | # my site's pages anymore; but kept for future reference. | |
26 | ||
d3adcff4 AB |
27 | |
28 | use strict; | |
29 | use warnings 'all'; | |
e02deb23 AB |
30 | use Getopt::Long; |
31 | ||
32469801 AB |
32 | my $format = 'html'; |
33 | my $lang = 'en'; | |
34 | my $index = ''; | |
35 | my $header = ''; | |
36 | my $footer = ''; | |
37 | ||
38 | GetOptions ('format=s' => \$format, | |
39 | 'lang=s' => \$lang, | |
40 | 'index' => \$index, | |
41 | 'header' => \$header, | |
42 | 'footer' => \$footer) | |
e02deb23 | 43 | or die("bad command line arguments\n"); |
d3adcff4 | 44 | |
32469801 AB |
45 | my $author = |
46 | $lang eq 'en' ? 'bandali' | |
47 | : $lang eq 'fa' ? 'بندعلی' | |
48 | : ''; | |
49 | my $site_title = | |
50 | $lang eq 'en' ? "${author}'s personal site" | |
51 | : $lang eq 'fa' ? "سایت شخصی $author" | |
52 | : ''; | |
53 | my $site_desc = | |
54 | $lang eq 'en' ? "notes and blog posts by $author" | |
55 | : $lang eq 'fa' ? "نوشتهها و بلاگ پستهای $author" | |
56 | : ''; | |
57 | my $site_url = | |
58 | ($lang eq 'en') ? 'https://bndl.org' | |
59 | : ($lang eq 'fa') ? 'https://bndl.org/fa/' | |
60 | : ''; | |
61 | my $feed_id = | |
62 | ($lang eq 'en') ? "tag:bndl.org,2020:notes.$format" | |
63 | : ($lang eq 'fa') ? "tag:bndl.org,2020:fa/notes.$format" | |
64 | : ''; | |
65 | ||
d3adcff4 AB |
66 | my $link_re = |
67 | qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// | |
68 | [\@:\w\.-]+(?:/ | |
69 | (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) | |
70 | (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? | |
71 | (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? | |
72 | )? | |
73 | )}xi; | |
74 | ||
75 | my %pairs = ( | |
76 | "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) | |
77 | "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby | |
78 | "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby | |
79 | ); | |
80 | ||
81 | my %html_map = ( | |
82 | '&' => '&', | |
83 | '<' => '<', | |
84 | '>' => '>', | |
85 | # '"' => '"', | |
86 | # "'" => ''', | |
87 | ); | |
88 | ||
89 | sub html_esc { | |
90 | my ($s) = @_; | |
91 | $s =~ s/([&<>])/$html_map{$1}/sge; | |
92 | $s; | |
93 | } | |
94 | ||
95 | sub linkify { | |
96 | my ($s) = @_; | |
97 | $s =~ s^$link_re^ | |
98 | my $beg = $1 || ''; | |
99 | my $url = $2; | |
100 | my $end = ''; | |
101 | ||
102 | # it's fairly common to end URLs in messages with | |
103 | # '.', ',' or ';' to denote the end of a statement; | |
104 | # assume the intent was to end the statement/sentence | |
105 | # in English | |
106 | if (defined(my $re = $pairs{$beg})) { | |
107 | if ($url =~ s/$re//) { | |
108 | $end = $1; | |
109 | } | |
110 | } elsif ($url =~ s/(\))?([\.,;])\z//) { | |
111 | $end = $2; | |
112 | # require ')' to be paired with '(' | |
113 | if (defined $1) { # ')' | |
114 | if (index($url, '(') < 0) { | |
115 | $end = ")$end"; | |
116 | } else { | |
117 | $url .= ')'; | |
118 | } | |
119 | } | |
120 | } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { | |
121 | $end = ')'; | |
122 | } | |
123 | ||
124 | $beg . "<a href=\"$url\">$url</a>" . $end; | |
125 | ^geo; | |
126 | $s; | |
127 | } | |
128 | ||
129 | ||
32469801 AB |
130 | my $out = ''; |
131 | ||
132 | # atom/rss feed header and footer | |
133 | if ($index and ($format eq 'atom' or $format eq 'rss')) { | |
134 | if ($header) { | |
135 | my $now_iso8601 = `date -Iseconds -u | tr -d \\\\n`; | |
136 | my $now_rfc5322 = `date -uR | tr -d \\\\n`; | |
137 | my $atom_rel = $format eq 'atom' ? 'self' : 'alternate'; | |
138 | my $rss_rel = $format eq 'rss' ? 'self' : 'alternate'; | |
139 | my $link = $format eq 'atom' ? 'link' : 'atom:link'; | |
140 | my $links = ''; | |
141 | if ($lang eq 'en') { | |
142 | $links = qq( | |
143 | <$link hreflang="fa" href="https://bndl.org/fa/notes.atom" rel="alternate" type="application/atom+xml" /> | |
144 | <$link hreflang="fa" href="https://bndl.org/fa/notes.rss" rel="alternate" type="application/rss+xml" /> | |
145 | <$link hreflang="fa" href="https://bndl.org/fa/bandali.fa.txt" rel="alternate" type="text/plain" /> | |
146 | <$link hreflang="fa" href="https://bndl.org/fa/" rel="alternate" type="text/html" /> | |
147 | <$link href="https://bndl.org/notes.atom" rel="$atom_rel" type="application/atom+xml" /> | |
148 | <$link href="https://bndl.org/notes.rss" rel="$rss_rel" type="application/rss+xml" /> | |
149 | <$link href="https://bndl.org/bandali.txt" rel="alternate" type="text/plain" /> | |
150 | <$link href="https://bndl.org" rel="alternate" type="text/html" />); | |
151 | } elsif ($lang eq 'fa') { | |
152 | $links = qq( | |
153 | <link hreflang="en" href="https://bndl.org/notes.atom" rel="alternate" type="application/atom+xml" /> | |
154 | <link hreflang="en" href="https://bndl.org/notes.rss" rel="alternate" type="application/rss+xml" /> | |
155 | <link hreflang="en" href="https://bndl.org/bandali.txt" rel="alternate" type="text/plain" /> | |
156 | <link hreflang="en" href="https://bndl.org" rel="alternate" type="text/html" /> | |
157 | <link href="https://bndl.org/fa/notes.atom" rel="$atom_rel" type="application/atom+xml" /> | |
158 | <link href="https://bndl.org/fa/notes.rss" rel="$rss_rel" type="application/rss+xml" /> | |
159 | <link href="https://bndl.org/fa/bandali.fa.txt" rel="alternate" type="text/plain" /> | |
160 | <link href="https://bndl.org/fa/" rel="alternate" type="text/html" />); | |
161 | } | |
162 | $links =~ s/^\n//; | |
163 | ||
164 | $out .= '<?xml version="1.0" encoding="UTF-8" ?>'; | |
165 | $out .= ($format eq 'atom') ? qq( | |
166 | <feed xml:lang="$lang" xmlns="http://www.w3.org/2005/Atom"> | |
167 | <title>$site_title</title> | |
168 | <subtitle>$site_desc</subtitle> | |
169 | <id>$feed_id</id> | |
170 | $links | |
171 | <updated>$now_iso8601</updated>) | |
172 | : ($format eq 'rss') ? qq( | |
173 | <rss version="2.0" | |
174 | xmlns:atom="http://www.w3.org/2005/Atom" | |
175 | xmlns:content="http://purl.org/rss/1.0/modules/content/"> | |
176 | <channel> | |
177 | <title>$site_title</title> | |
178 | <description>$site_desc</description> | |
179 | <link>$site_url</link> | |
180 | <language>$lang</language> | |
181 | <lastBuildDate>$now_rfc5322</lastBuildDate> | |
182 | <pubDate>$now_rfc5322</pubDate> | |
183 | <ttl>1800</ttl> | |
184 | $links) | |
185 | : ''; | |
186 | } elsif ($footer) { | |
187 | $out .= ($format eq 'atom') ? '</feed>' | |
188 | : ($format eq 'rss') ? '</channel></rss>' | |
189 | : ''; | |
190 | } | |
191 | ||
192 | # we're done | |
193 | goto PRINT; | |
194 | } | |
195 | ||
196 | ||
d3adcff4 | 197 | my $txt = do { local $/; <STDIN> }; |
212ec2e6 | 198 | |
d3adcff4 AB |
199 | my $title = html_esc($txt =~ /\A([^\n]+)/); |
200 | $title =~ s/^\s+|\s+$//g; | |
d29d6cb9 | 201 | $title .= " — $author" if $title !~ /$author/; |
212ec2e6 AB |
202 | |
203 | my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/; | |
204 | ($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd; | |
205 | ($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub; | |
2e543551 | 206 | $upd = $pub if (!$upd); |
212ec2e6 | 207 | ($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url; |
ce2fcdbc AB |
208 | $url = 'https://bndl.org/bandali-cv.txt' |
209 | if (!$url and $title =~ /curriculum vitae/); | |
210 | $url = html_esc($url) if $url; | |
d3adcff4 AB |
211 | |
212 | $txt = linkify(html_esc($txt)); | |
213 | ||
32469801 AB |
214 | |
215 | my $upd_iso8601 = `date -Iseconds -ud '$upd' | tr -d \\\\n` if $upd; | |
216 | my $pub_iso8601 = `date -Iseconds -ud '$pub' | tr -d \\\\n` if $pub; | |
217 | my $pub_rfc5322 = `date -uRd '$pub' | tr -d \\\\n` if $pub; | |
22e69f33 AB |
218 | my $url_html = $url =~ s/(?:[.]$lang)?[.]txt$/.html/r if $url; |
219 | $url_html =~ s|/bandali-(.*)|/$1| if $url_html; | |
220 | my $slug = $url_html =~ s|.*/(.*)[.]html$|$1|r if $url_html; | |
221 | my $note_id = "$feed_id:$slug" if $url_html; | |
32469801 AB |
222 | |
223 | # note header | |
224 | if ($format eq 'html') { | |
225 | $out .= | |
226 | '<!doctype html>' | |
227 | . qq(<html lang="$lang") . ($lang eq 'fa' | |
228 | ? ' dir="rtl"' | |
229 | : '' . '>') | |
230 | . qq(<head> | |
ce2fcdbc | 231 | <meta http-equiv="Content-Type" |
32469801 AB |
232 | content="text/html; charset=utf-8" />\n) |
233 | . "<title>$title</title>\n" | |
234 | . qq(<link rel="icon" href="data:,">\n) | |
235 | . ($url | |
236 | ? qq(<link rel="alternate" href="$url" | |
237 | title="plain text" type="text/plain" />\n) | |
238 | : '') | |
239 | . (($index and $lang eq 'en') | |
240 | ? qq(<link rel="alternate" href="https://bndl.org/fa/" | |
ce2fcdbc | 241 | hreflang="fa" title="persian" />\n) |
32469801 AB |
242 | : ($index and $lang eq 'fa') |
243 | ? qq(<link rel="alternate" href="https://bndl.org/" | |
ce2fcdbc | 244 | hreflang="en" title="english" />\n) |
32469801 AB |
245 | : '') |
246 | . qq(<style>\@media(prefers-color-scheme:dark){ | |
19580079 | 247 | body{background:#1c1c1c;color:white;}a:link{color:#acdeff;} |
32469801 AB |
248 | a:visited{color:#f8f;}a:active{color:#e00;}}) |
249 | . ($lang eq 'fa' | |
250 | ? qq(\n\@font-face{font-family:sahel;font-weight:normal; | |
d3adcff4 | 251 | src:local('Sahel WOL'),local('Sahel'), |
19580079 | 252 | url('sahel.woff2')format('woff2');}pre{font-family:sahel}) |
32469801 AB |
253 | : '') |
254 | . "</style>\n" | |
255 | . '</head><body><pre>'; | |
256 | } elsif ($format eq 'atom' or $format eq 'rss') { | |
257 | my $atom_updated = | |
258 | ($format eq 'atom') ? 'updated' | |
259 | : ($format eq 'rss') ? 'atom:updated' | |
260 | : ''; | |
261 | my $updated = | |
262 | "<$atom_updated>$upd_iso8601</$atom_updated>\n" if $upd; | |
263 | $out .= ($format eq 'atom') ? qq( | |
264 | <entry xml:base="$site_url"> | |
265 | <author><name>$author</name></author> | |
266 | <id>$note_id</id> | |
267 | <published>$pub_iso8601</published>\n) | |
268 | . ($updated ? $updated : '') . | |
269 | qq(<link href="$url" rel="alternate" type="text/plain" /> | |
270 | <link href="$url_html" rel="alternate" type="text/html" /> | |
271 | <title>$title</title> | |
272 | <content type="html"><![CDATA[<pre>) | |
273 | : ($format eq 'rss') ? qq( | |
274 | <item> | |
275 | <title>$title</title> | |
276 | <link>$url_html</link> | |
277 | <guid isPermaLink="false">$note_id</guid> | |
278 | <pubDate>$pub_rfc5322</pubDate>\n) | |
2e543551 | 279 | . (($updated and $pub ne $upd) ? $updated : '') . |
32469801 AB |
280 | qq(<content:encoded><![CDATA[<pre>) |
281 | : ''; | |
282 | } | |
283 | # note body | |
284 | $out .= $txt; | |
285 | # note footer | |
286 | if ($format eq 'html') { | |
287 | $out .= '</pre></body></html>'; | |
288 | } elsif ($format eq 'atom') { | |
289 | $out .= "</pre>]]></content></entry>"; | |
290 | } elsif ($format eq 'rss') { | |
291 | $out .= "</pre>]]></content:encoded></item>"; | |
292 | } | |
293 | ||
294 | PRINT: | |
295 | print("$out\n"); | |
d3adcff4 | 296 | STDOUT->flush; |