From 97130f95f04bd5cf8363b35994e3c44f11d70f0c Mon Sep 17 00:00:00 2001 From: Matthias Andreas Benkard Date: Mon, 27 Jan 2020 21:03:39 +0100 Subject: Wiki: Render WikiWord links and autolinks on the server side. Change-Id: I46f972bcebf765a3d9fb55b7b35f40deb978dc5d --- .../mulk/mulkcms2/benki/wiki/WikiPageRevision.java | 77 +++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) (limited to 'src/main/java') diff --git a/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java b/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java index 5783166..4054312 100644 --- a/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java +++ b/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java @@ -3,6 +3,9 @@ package eu.mulk.mulkcms2.benki.wiki; import eu.mulk.mulkcms2.benki.users.User; import io.quarkus.hibernate.orm.panache.PanacheEntityBase; import java.time.OffsetDateTime; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.FetchType; @@ -12,6 +15,11 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.TextNode; +import org.jsoup.parser.Tag; @Entity @Table(name = "wiki_page_revisions", schema = "benki") @@ -53,9 +61,76 @@ public class WikiPageRevision extends PanacheEntityBase { User author) { this.date = date; this.title = title; - this.content = content; + this.content = unhrefify(unwikilinkify(Jsoup.parse(content))).select("body").html(); this.format = format; this.page = page; this.author = author; } + + public String enrichedContent() { + return wikilinkify(hrefify(Jsoup.parse(content))).select("body").html(); + } + + private static Document tagsoupMapText(Document soup, Function fn) { + for (var subnode : + soup.select(":not(a):not(a *)").stream() + .flatMap(node -> node.childNodes().stream()) + .collect(Collectors.toUnmodifiableList())) { + if (subnode instanceof TextNode) { + var newNode = new Element(Tag.valueOf("span"), ""); + newNode.html(fn.apply(((TextNode) subnode).text())); + subnode.replaceWith(newNode); + newNode.unwrap(); + } + } + return soup; + } + + private static Pattern WIKIWORD_REGEX = + Pattern.compile( + "\\p{javaUpperCase}+\\p{javaLowerCase}+\\p{javaUpperCase}+\\p{javaLowerCase}+\\w+"); + private static Pattern URL_REGEX = + Pattern.compile("\\(?\\bhttps?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]"); + + private static Document hrefify(Document soup) { + return tagsoupMapText( + soup, + x -> + URL_REGEX + .matcher(x) + .replaceAll( + match -> { + var s = match.group(); + var leftParen = s.startsWith("("); + var rightParen = s.endsWith(")"); + var url = + s.substring(leftParen ? 1 : 0, rightParen ? s.length() - 1 : s.length()); + return String.format( + "%s%s%s", + leftParen ? "(" : "", url, url, rightParen ? ")" : ""); + })); + } + + private static Document unhrefify(Document soup) { + soup.select(".benkiautohref").unwrap(); + return soup; + } + + private static Document wikilinkify(Document soup) { + return tagsoupMapText( + soup, + x -> + WIKIWORD_REGEX + .matcher(x) + .replaceAll( + match -> + String.format( + "%s", + match.group(), match.group()))); + } + + private static Document unwikilinkify(Document soup) { + soup.select(".benkilink").unwrap(); + return soup; + } } -- cgit v1.2.3