diff options
author | Matthias Andreas Benkard <code@mail.matthias.benkard.de> | 2020-01-27 21:03:39 +0100 |
---|---|---|
committer | Matthias Andreas Benkard <code@mail.matthias.benkard.de> | 2020-01-27 21:03:39 +0100 |
commit | 97130f95f04bd5cf8363b35994e3c44f11d70f0c (patch) | |
tree | 97a31afc7213ae64ae1a82b85d09f96865ad4d28 /src/main/java/eu | |
parent | 0c1536469d7e3e286bd72df8e09ac7e4a7a0c4ae (diff) |
Wiki: Render WikiWord links and autolinks on the server side.
Change-Id: I46f972bcebf765a3d9fb55b7b35f40deb978dc5d
Diffstat (limited to 'src/main/java/eu')
-rw-r--r-- | src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java | 77 |
1 files changed, 76 insertions, 1 deletions
diff --git a/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java b/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java index 5783166..4054312 100644 --- a/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java +++ b/src/main/java/eu/mulk/mulkcms2/benki/wiki/WikiPageRevision.java @@ -3,6 +3,9 @@ package eu.mulk.mulkcms2.benki.wiki; import eu.mulk.mulkcms2.benki.users.User; import io.quarkus.hibernate.orm.panache.PanacheEntityBase; import java.time.OffsetDateTime; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.FetchType; @@ -12,6 +15,11 @@ import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.TextNode; +import org.jsoup.parser.Tag; @Entity @Table(name = "wiki_page_revisions", schema = "benki") @@ -53,9 +61,76 @@ public class WikiPageRevision extends PanacheEntityBase { User author) { this.date = date; this.title = title; - this.content = content; + this.content = unhrefify(unwikilinkify(Jsoup.parse(content))).select("body").html(); this.format = format; this.page = page; this.author = author; } + + public String enrichedContent() { + return wikilinkify(hrefify(Jsoup.parse(content))).select("body").html(); + } + + private static Document tagsoupMapText(Document soup, Function<String, String> fn) { + for (var subnode : + soup.select(":not(a):not(a *)").stream() + .flatMap(node -> node.childNodes().stream()) + .collect(Collectors.toUnmodifiableList())) { + if (subnode instanceof TextNode) { + var newNode = new Element(Tag.valueOf("span"), ""); + newNode.html(fn.apply(((TextNode) subnode).text())); + subnode.replaceWith(newNode); + newNode.unwrap(); + } + } + return soup; + } + + private static Pattern WIKIWORD_REGEX = + Pattern.compile( + "\\p{javaUpperCase}+\\p{javaLowerCase}+\\p{javaUpperCase}+\\p{javaLowerCase}+\\w+"); + private static Pattern URL_REGEX = + Pattern.compile("\\(?\\bhttps?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]"); + + private static Document hrefify(Document soup) { + return tagsoupMapText( + soup, + x -> + URL_REGEX + .matcher(x) + .replaceAll( + match -> { + var s = match.group(); + var leftParen = s.startsWith("("); + var rightParen = s.endsWith(")"); + var url = + s.substring(leftParen ? 1 : 0, rightParen ? s.length() - 1 : s.length()); + return String.format( + "%s<a href=\"%s\" class=\"benkiautohref\">%s</a>%s", + leftParen ? "(" : "", url, url, rightParen ? ")" : ""); + })); + } + + private static Document unhrefify(Document soup) { + soup.select(".benkiautohref").unwrap(); + return soup; + } + + private static Document wikilinkify(Document soup) { + return tagsoupMapText( + soup, + x -> + WIKIWORD_REGEX + .matcher(x) + .replaceAll( + match -> + String.format( + "<a href=\"/wiki/%s\" class=\"benkilink\">%s</a>", + match.group(), match.group()))); + } + + private static Document unwikilinkify(Document soup) { + soup.select(".benkilink").unwrap(); + return soup; + } } |