From: Amin Bandali Date: Sun, 10 Oct 2021 15:12:14 +0000 (-0400) Subject: txt2html: rename to txt2pre X-Git-Url: https://git.shemshak.org/gitweb.cgi/~bandali/bndl.org/commitdiff_plain/4ce760d0e6a8e2a939e45afce78045bf6ead9828?ds=inline;hp=212ec2e606552fdfa9fcd970a5e27a7dc9ce0042 txt2html: rename to txt2pre i plan on adding support for other output markups (e.g. atom and rss) --- diff --git a/GNUmakefile b/GNUmakefile index d791a55..ae9eab7 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -8,7 +8,7 @@ # without any warranty. -gen-html = perl txt2html $(1) < $< > $@ +gen-html = perl txt2pre $(1) < $< > $@ port := $(if $(port),$(port),8000) TXT := $(filter-out bandali-pubkey.txt bandali.txt,$(wildcard *.txt)) @@ -39,6 +39,6 @@ serve: watch: while true; do \ echo $(TXT) $(TXT_FA) bandali.txt fa/bandali.fa.txt \ - GNUmakefile txt2html | tr " " "\n" | entr -d make; done + GNUmakefile txt2pre | tr " " "\n" | entr -d make; done .PHONY: all clean serve watch diff --git a/txt2html b/txt2html deleted file mode 100644 index e6dc30b..0000000 --- a/txt2html +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env perl -# txt2html --- simple script to convert my site's txt files to html - -# Copyright (C) 2014-2021 all contributors -# Copyright (c) 2021 Amin Bandali -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# This simple script borrows from the wonderful `txt2pre' from -# public-inbox.git, under AGPLv3+, with a few additions of my own. - - -use strict; -use warnings 'all'; -use Getopt::Long; - -my $opt_lang = 'en'; -my $opt_index; -GetOptions ('lang=s' => \$opt_lang, - 'index' => \$opt_index) - or die("bad command line arguments\n"); - -my $link_re = - qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// - [\@:\w\.-]+(?:/ - (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) - (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? - (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? - )? - )}xi; - -my %pairs = ( - "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) - "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby - "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby -); - -my %html_map = ( - '&' => '&', - '<' => '<', - '>' => '>', - # '"' => '"', - # "'" => ''', -); - -sub html_esc { - my ($s) = @_; - $s =~ s/([&<>])/$html_map{$1}/sge; - $s; -} - -sub linkify { - my ($s) = @_; - $s =~ s^$link_re^ - my $beg = $1 || ''; - my $url = $2; - my $end = ''; - - # it's fairly common to end URLs in messages with - # '.', ',' or ';' to denote the end of a statement; - # assume the intent was to end the statement/sentence - # in English - if (defined(my $re = $pairs{$beg})) { - if ($url =~ s/$re//) { - $end = $1; - } - } elsif ($url =~ s/(\))?([\.,;])\z//) { - $end = $2; - # require ')' to be paired with '(' - if (defined $1) { # ')' - if (index($url, '(') < 0) { - $end = ")$end"; - } else { - $url .= ')'; - } - } - } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { - $end = ')'; - } - - $beg . "$url" . $end; - ^geo; - $s; -} - - -my $txt = do { local $/; }; - -my $title = html_esc($txt =~ /\A([^\n]+)/); -$title =~ s/^\s+|\s+$//g; -if ($opt_lang eq 'fa') { - $title .= ' — بندعلی' if $title !~ /بندعلی/; -} else { - $title .= ' — bandali' if $title !~ /bandali/; -} - -my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/; -($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd; -($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub; -($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url; -$url = 'https://bndl.org/bandali-cv.txt' - if (!$url and $title =~ /curriculum vitae/); -$url = html_esc($url) if $url; - -$txt = linkify(html_esc($txt)); - -print("", - qq(", - qq( -\n), - "$title\n", - qq(\n), - $url ? qq(\n) : '', - ($opt_index and $opt_lang eq 'en') - ? qq(\n) - : ($opt_index and $opt_lang eq 'fa') - ? qq(\n) - : '', - qq(\n", - "
$txt
\n"); -STDOUT->flush; diff --git a/txt2pre b/txt2pre new file mode 100644 index 0000000..e288a69 --- /dev/null +++ b/txt2pre @@ -0,0 +1,147 @@ +#!/usr/bin/env perl +# txt2pre --- convert my site's txt files to `pre'-based html + +# Copyright (C) 2014-2021 all contributors +# Copyright (c) 2021 Amin Bandali +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# This simple script borrows from a script of the same name from the +# wonderful public-inbox project, under AGPLv3+, with additions of +# my own. + + +use strict; +use warnings 'all'; +use Getopt::Long; + +my $opt_lang = 'en'; +my $opt_index; +GetOptions ('lang=s' => \$opt_lang, + 'index' => \$opt_index) + or die("bad command line arguments\n"); + +my $link_re = + qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// + [\@:\w\.-]+(?:/ + (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) + (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? + (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? + )? + )}xi; + +my %pairs = ( + "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) + "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby + "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby +); + +my %html_map = ( + '&' => '&', + '<' => '<', + '>' => '>', + # '"' => '"', + # "'" => ''', +); + +sub html_esc { + my ($s) = @_; + $s =~ s/([&<>])/$html_map{$1}/sge; + $s; +} + +sub linkify { + my ($s) = @_; + $s =~ s^$link_re^ + my $beg = $1 || ''; + my $url = $2; + my $end = ''; + + # it's fairly common to end URLs in messages with + # '.', ',' or ';' to denote the end of a statement; + # assume the intent was to end the statement/sentence + # in English + if (defined(my $re = $pairs{$beg})) { + if ($url =~ s/$re//) { + $end = $1; + } + } elsif ($url =~ s/(\))?([\.,;])\z//) { + $end = $2; + # require ')' to be paired with '(' + if (defined $1) { # ')' + if (index($url, '(') < 0) { + $end = ")$end"; + } else { + $url .= ')'; + } + } + } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { + $end = ')'; + } + + $beg . "$url" . $end; + ^geo; + $s; +} + + +my $txt = do { local $/; }; + +my $title = html_esc($txt =~ /\A([^\n]+)/); +$title =~ s/^\s+|\s+$//g; +if ($opt_lang eq 'fa') { + $title .= ' — بندعلی' if $title !~ /بندعلی/; +} else { + $title .= ' — bandali' if $title !~ /bandali/; +} + +my ($upd, $pub, $url) = $txt =~ /(.*)\r?\n(.*)\r?\n(.*)\r?\n?\z/; +($upd) = $upd =~ /(?:updated|ویرایش): (.*)/ if $upd; +($pub) = $pub =~ /(?:published|انتشار): (.*)/ if $pub; +($url) = $url =~ /(?:plain text|متن ساده): (.*)/ if $url; +$url = 'https://bndl.org/bandali-cv.txt' + if (!$url and $title =~ /curriculum vitae/); +$url = html_esc($url) if $url; + +$txt = linkify(html_esc($txt)); + +print("", + qq(", + qq( +\n), + "$title\n", + qq(\n), + $url ? qq(\n) : '', + ($opt_index and $opt_lang eq 'en') + ? qq(\n) + : ($opt_index and $opt_lang eq 'fa') + ? qq(\n) + : '', + qq(\n", + "
$txt
\n"); +STDOUT->flush;