summaryrefslogtreecommitdiff
path: root/third-party/s-xml/src
diff options
context:
space:
mode:
authorMatthias Benkard <code@mail.matthias.benkard.de>2009-10-30 20:52:07 +0100
committerMatthias Benkard <code@mail.matthias.benkard.de>2009-10-30 20:52:07 +0100
commitddb83b1fb2d305e0c06fc067d82d6bab5458b0fd (patch)
tree8f9003926f0b5295c7a04b2ca257c0a2155ce841 /third-party/s-xml/src
parent15937a1a4f1cf40bc55aa34eb71c67b88466ff57 (diff)
Add third-party XML processing libraries.
Ignore-this: 5ca28497555bf944858ca2f58bc8a62b darcs-hash:a0b0f9baa7c9b1259e755435db1fb17123630a6c
Diffstat (limited to 'third-party/s-xml/src')
-rw-r--r--third-party/s-xml/src/dom.lisp75
-rw-r--r--third-party/s-xml/src/lxml-dom.lisp83
-rw-r--r--third-party/s-xml/src/package.lisp46
-rw-r--r--third-party/s-xml/src/sxml-dom.lisp76
-rw-r--r--third-party/s-xml/src/xml-struct-dom.lisp125
-rw-r--r--third-party/s-xml/src/xml.lisp700
6 files changed, 1105 insertions, 0 deletions
diff --git a/third-party/s-xml/src/dom.lisp b/third-party/s-xml/src/dom.lisp
new file mode 100644
index 0000000..dcf6e82
--- /dev/null
+++ b/third-party/s-xml/src/dom.lisp
@@ -0,0 +1,75 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: dom.lisp,v 1.2 2005-08-29 15:01:47 scaekenberghe Exp $
+;;;;
+;;;; This is the generic simple DOM parser and printer interface.
+;;;;
+;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(in-package :s-xml)
+
+;;; top level DOM parser interface
+
+(defgeneric parse-xml-dom (stream output-type)
+ (:documentation "Parse a character stream as XML and generate a DOM of output-type"))
+
+(defun parse-xml (stream &key (output-type :lxml))
+ "Parse a character stream as XML and generate a DOM of output-type, defaulting to :lxml"
+ (parse-xml-dom stream output-type))
+
+(defun parse-xml-string (string &key (output-type :lxml))
+ "Parse a string as XML and generate a DOM of output-type, defaulting to :lxml"
+ (with-input-from-string (stream string)
+ (parse-xml-dom stream output-type)))
+
+(defun parse-xml-file (filename &key (output-type :lxml))
+ "Parse a character file as XML and generate a DOM of output-type, defaulting to :lxml"
+ (with-open-file (in filename :direction :input)
+ (parse-xml-dom in output-type)))
+
+;;; top level DOM printer interface
+
+(defgeneric print-xml-dom (dom input-type stream pretty level)
+ (:documentation "Generate XML output on a character stream from a DOM of input-type, optionally pretty printing using level"))
+
+(defun print-xml (dom &key (stream t) (pretty nil) (input-type :lxml) (header))
+ "Generate XML output on a character stream (t by default) from a DOM of input-type (:lxml by default), optionally pretty printing (off by default), or adding a header (none by default)"
+ (when header (format stream header))
+ (when pretty (terpri stream))
+ (print-xml-dom dom input-type stream pretty 1))
+
+(defun print-xml-string (dom &key (pretty nil) (input-type :lxml))
+ "Generate XML output to a string from a DOM of input-type (:lxml by default), optionally pretty printing (off by default)"
+ (with-output-to-string (stream)
+ (print-xml dom :stream stream :pretty pretty :input-type input-type)))
+
+;;; shared/common support functions
+
+(defun print-spaces (n stream &optional (preceding-newline t))
+ (when preceding-newline
+ (terpri stream))
+ (loop :repeat n
+ :do (write-char #\Space stream)))
+
+(defun print-solitary-tag (tag stream)
+ (write-char #\< stream)
+ (print-identifier tag stream)
+ (write-string "/>" stream))
+
+(defun print-closing-tag (tag stream)
+ (write-string "</" stream)
+ (print-identifier tag stream)
+ (write-char #\> stream))
+
+(defun print-attribute (name value stream)
+ (write-char #\space stream)
+ (print-identifier name stream t)
+ (write-string "=\"" stream)
+ (print-string-xml value stream)
+ (write-char #\" stream))
+
+;;;; eof
diff --git a/third-party/s-xml/src/lxml-dom.lisp b/third-party/s-xml/src/lxml-dom.lisp
new file mode 100644
index 0000000..449fea3
--- /dev/null
+++ b/third-party/s-xml/src/lxml-dom.lisp
@@ -0,0 +1,83 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: lxml-dom.lisp,v 1.6 2005-11-20 14:34:15 scaekenberghe Exp $
+;;;;
+;;;; LXML implementation of the generic DOM parser and printer.
+;;;;
+;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(in-package :s-xml)
+
+;;; the lxml hooks to generate lxml
+
+(defun lxml-new-element-hook (name attributes seed)
+ (declare (ignore name attributes seed))
+ '())
+
+(defun lxml-finish-element-hook (name attributes parent-seed seed)
+ (let ((xml-element
+ (cond ((and (null seed) (null attributes))
+ name)
+ (attributes
+ `((,name ,@(let (list)
+ (dolist (attribute attributes list)
+ (push (cdr attribute) list)
+ (push (car attribute) list))))
+ ,@(nreverse seed)))
+ (t
+ `(,name ,@(nreverse seed))))))
+ (cons xml-element parent-seed)))
+
+(defun lxml-text-hook (string seed)
+ (cons string seed))
+
+;;; standard DOM interfaces
+
+(defmethod parse-xml-dom (stream (output-type (eql :lxml)))
+ (car (start-parse-xml stream
+ (make-instance 'xml-parser-state
+ :new-element-hook #'lxml-new-element-hook
+ :finish-element-hook #'lxml-finish-element-hook
+ :text-hook #'lxml-text-hook))))
+
+(defun plist->alist (plist)
+ (when plist
+ (cons (cons (first plist) (second plist))
+ (plist->alist (rest (rest plist))))))
+
+(defmethod print-xml-dom (dom (input-type (eql :lxml)) stream pretty level)
+ (declare (special *namespaces*))
+ (cond ((symbolp dom) (print-solitary-tag dom stream))
+ ((stringp dom) (print-string-xml dom stream))
+ ((consp dom)
+ (let (tag attributes)
+ (cond ((symbolp (first dom)) (setf tag (first dom)))
+ ((consp (first dom)) (setf tag (first (first dom))
+ attributes (plist->alist (rest (first dom)))))
+ (t (error "Input not recognized as LXML ~s" dom)))
+ (let ((*namespaces* (extend-namespaces attributes *namespaces*)))
+ (write-char #\< stream)
+ (print-identifier tag stream)
+ (loop :for (name . value) :in attributes
+ :do (print-attribute name value stream))
+ (if (rest dom)
+ (let ((children (rest dom)))
+ (write-char #\> stream)
+ (if (and (= (length children) 1) (stringp (first children)))
+ (print-string-xml (first children) stream)
+ (progn
+ (dolist (child children)
+ (when pretty (print-spaces (* 2 level) stream))
+ (if (stringp child)
+ (print-string-xml child stream)
+ (print-xml-dom child input-type stream pretty (1+ level))))
+ (when pretty (print-spaces (* 2 (1- level)) stream))))
+ (print-closing-tag tag stream))
+ (write-string "/>" stream)))))
+ (t (error "Input not recognized as LXML ~s" dom))))
+
+;;;; eof \ No newline at end of file
diff --git a/third-party/s-xml/src/package.lisp b/third-party/s-xml/src/package.lisp
new file mode 100644
index 0000000..1fc0cca
--- /dev/null
+++ b/third-party/s-xml/src/package.lisp
@@ -0,0 +1,46 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: package.lisp,v 1.8 2006-01-31 11:44:15 scaekenberghe Exp $
+;;;;
+;;;; This is a Common Lisp implementation of a very basic XML parser.
+;;;; The parser is non-validating.
+;;;; The API into the parser is pure functional parser hook model that comes from SSAX,
+;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
+;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
+;;;;
+;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(defpackage s-xml
+ (:use common-lisp)
+ (:export
+ ;; main parser interface
+ #:start-parse-xml
+ #:print-string-xml
+ #:xml-parser-error #:xml-parser-error-message #:xml-parser-error-args #:xml-parser-error-stream
+ #:xml-parser-state #:get-entities #:get-seed
+ #:get-new-element-hook #:get-finish-element-hook #:get-text-hook
+ ;; callbacks
+ #:*attribute-name-parser*
+ #:*attribute-value-parser*
+ #:parse-attribute-name
+ #:parse-attribute-value
+ ;; dom parser and printer
+ #:parse-xml-dom #:parse-xml #:parse-xml-string #:parse-xml-file
+ #:print-xml-dom #:print-xml #:print-xml-string
+ ;; xml-element structure
+ #:make-xml-element #:xml-element-children #:xml-element-name
+ #:xml-element-attribute #:xml-element-attributes
+ #:xml-element-p #:new-xml-element #:first-xml-element-child
+ ;; namespaces
+ #:*ignore-namespaces* #:*local-namespace* #:*namespaces*
+ #:*require-existing-symbols* #:*auto-export-symbols* #:*auto-create-namespace-packages*
+ #:find-namespace #:register-namespace #:get-prefix #:get-uri #:get-package
+ #:resolve-identifier #:extend-namespaces #:print-identifier #:split-identifier)
+ (:documentation
+ "A simple XML parser with an efficient, purely functional, event-based interface as well as a DOM interface"))
+
+;;;; eof
diff --git a/third-party/s-xml/src/sxml-dom.lisp b/third-party/s-xml/src/sxml-dom.lisp
new file mode 100644
index 0000000..dee3de8
--- /dev/null
+++ b/third-party/s-xml/src/sxml-dom.lisp
@@ -0,0 +1,76 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: sxml-dom.lisp,v 1.5 2005-11-20 14:34:15 scaekenberghe Exp $
+;;;;
+;;;; LXML implementation of the generic DOM parser and printer.
+;;;;
+;;;; Copyright (C) 2003, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(in-package :s-xml)
+
+;;; the sxml hooks to generate sxml
+
+(defun sxml-new-element-hook (name attributes seed)
+ (declare (ignore name attributes seed))
+ '())
+
+(defun sxml-finish-element-hook (name attributes parent-seed seed)
+ (let ((xml-element (append (list name)
+ (when attributes
+ (list (let (list)
+ (dolist (attribute attributes (cons :@ list))
+ (push (list (car attribute) (cdr attribute)) list)))))
+ (nreverse seed))))
+ (cons xml-element parent-seed)))
+
+(defun sxml-text-hook (string seed)
+ (cons string seed))
+
+;;; the standard DOM interfaces
+
+(defmethod parse-xml-dom (stream (output-type (eql :sxml)))
+ (car (start-parse-xml stream
+ (make-instance 'xml-parser-state
+ :new-element-hook #'sxml-new-element-hook
+ :finish-element-hook #'sxml-finish-element-hook
+ :text-hook #'sxml-text-hook))))
+
+(defmethod print-xml-dom (dom (input-type (eql :sxml)) stream pretty level)
+ (declare (special *namespaces*))
+ (cond ((stringp dom) (print-string-xml dom stream))
+ ((consp dom)
+ (let ((tag (first dom))
+ attributes
+ children)
+ (if (and (consp (second dom)) (eq (first (second dom)) :@))
+ (setf attributes (rest (second dom))
+ children (rest (rest dom)))
+ (setf children (rest dom)))
+ (let ((*namespaces* (extend-namespaces (loop :for (name value) :in attributes
+ :collect (cons name value))
+ *namespaces*)))
+ (write-char #\< stream)
+ (print-identifier tag stream)
+ (loop :for (name value) :in attributes
+ :do (print-attribute name value stream))
+ (if children
+ (progn
+ (write-char #\> stream)
+ (if (and (= (length children) 1) (stringp (first children)))
+ (print-string-xml (first children) stream)
+ (progn
+ (dolist (child children)
+ (when pretty (print-spaces (* 2 level) stream))
+ (if (stringp child)
+ (print-string-xml child stream)
+ (print-xml-dom child input-type stream pretty (1+ level))))
+ (when pretty (print-spaces (* 2 (1- level)) stream))))
+ (print-closing-tag tag stream))
+ (write-string "/>" stream)))))
+ (t (error "Input not recognized as SXML ~s" dom))))
+
+;;;; eof
diff --git a/third-party/s-xml/src/xml-struct-dom.lisp b/third-party/s-xml/src/xml-struct-dom.lisp
new file mode 100644
index 0000000..916f747
--- /dev/null
+++ b/third-party/s-xml/src/xml-struct-dom.lisp
@@ -0,0 +1,125 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: xml-struct-dom.lisp,v 1.3 2005-09-20 09:57:48 scaekenberghe Exp $
+;;;;
+;;;; XML-STRUCT implementation of the generic DOM parser and printer.
+;;;;
+;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(in-package :s-xml)
+
+;;; xml-element struct datastructure and API
+
+(defstruct xml-element
+ name ; :tag-name
+ attributes ; a assoc list of (:attribute-name . "attribute-value")
+ children ; a list of children/content either text strings or xml-elements
+ )
+
+(setf (documentation 'xml-element-p 'function)
+ "Return T when the argument is an xml-element struct"
+ (documentation 'xml-element-attributes 'function)
+ "Return the alist of attribute names and values dotted pairs from an xml-element struct"
+ (documentation 'xml-element-children 'function)
+ "Return the list of children from an xml-element struct"
+ (documentation 'xml-element-name 'function)
+ "Return the name from an xml-element struct"
+ (documentation 'make-xml-element 'function)
+ "Make and return a new xml-element struct")
+
+(defun xml-element-attribute (xml-element key)
+ "Return the string value of the attribute with name the keyword :key
+ of xml-element if any, return null if not found"
+ (let ((pair (assoc key (xml-element-attributes xml-element) :test #'eq)))
+ (when pair (cdr pair))))
+
+(defun (setf xml-element-attribute) (value xml-element key)
+ "Set the string value of the attribute with name the keyword :key of
+ xml-element, creating a new attribute if necessary or overwriting an
+ existing one, returning the value"
+ (let ((attributes (xml-element-attributes xml-element)))
+ (if (null attributes)
+ (push (cons key value) (xml-element-attributes xml-element))
+ (let ((pair (assoc key attributes :test #'eq)))
+ (if pair
+ (setf (cdr pair) value)
+ (push (cons key value) (xml-element-attributes xml-element)))))
+ value))
+
+(defun new-xml-element (name &rest children)
+ "Make a new xml-element with name and children"
+ (make-xml-element :name name :children children))
+
+(defun first-xml-element-child (xml-element)
+ "Get the first child of an xml-element"
+ (first (xml-element-children xml-element)))
+
+(defun xml-equal (xml-1 xml-2)
+ (and (xml-element-p xml-1)
+ (xml-element-p xml-2)
+ (eq (xml-element-name xml-1)
+ (xml-element-name xml-2))
+ (equal (xml-element-attributes xml-1)
+ (xml-element-attributes xml-2))
+ (reduce #'(lambda (&optional (x t) (y t)) (and x y))
+ (mapcar #'(lambda (x y)
+ (or (and (stringp x) (stringp y) (string= x y))
+ (xml-equal x y)))
+ (xml-element-children xml-1)
+ (xml-element-children xml-2)))))
+
+;;; printing xml structures
+
+(defmethod print-xml-dom (xml-element (input-type (eql :xml-struct)) stream pretty level)
+ (declare (special *namespaces*))
+ (let ((*namespaces* (extend-namespaces (xml-element-attributes xml-element)
+ *namespaces*)))
+ (write-char #\< stream)
+ (print-identifier (xml-element-name xml-element) stream)
+ (loop :for (name . value) :in (xml-element-attributes xml-element)
+ :do (print-attribute name value stream))
+ (let ((children (xml-element-children xml-element)))
+ (if children
+ (progn
+ (write-char #\> stream)
+ (if (and (= (length children) 1) (stringp (first children)))
+ (print-string-xml (first children) stream)
+ (progn
+ (dolist (child children)
+ (when pretty (print-spaces (* 2 level) stream))
+ (if (stringp child)
+ (print-string-xml child stream)
+ (print-xml-dom child input-type stream pretty (1+ level))))
+ (when pretty (print-spaces (* 2 (1- level)) stream))))
+ (print-closing-tag (xml-element-name xml-element) stream))
+ (write-string "/>" stream)))))
+
+;;; the standard hooks to generate xml-element structs
+
+(defun standard-new-element-hook (name attributes seed)
+ (declare (ignore name attributes seed))
+ '())
+
+(defun standard-finish-element-hook (name attributes parent-seed seed)
+ (let ((xml-element (make-xml-element :name name
+ :attributes attributes
+ :children (nreverse seed))))
+ (cons xml-element parent-seed)))
+
+(defun standard-text-hook (string seed)
+ (cons string seed))
+
+;;; top level standard parser interfaces
+
+(defmethod parse-xml-dom (stream (output-type (eql :xml-struct)))
+ (car (start-parse-xml stream
+ (make-instance 'xml-parser-state
+ :new-element-hook #'standard-new-element-hook
+ :finish-element-hook #'standard-finish-element-hook
+ :text-hook #'standard-text-hook))))
+
+;;;; eof
diff --git a/third-party/s-xml/src/xml.lisp b/third-party/s-xml/src/xml.lisp
new file mode 100644
index 0000000..8df61c6
--- /dev/null
+++ b/third-party/s-xml/src/xml.lisp
@@ -0,0 +1,700 @@
+;;;; -*- mode: lisp -*-
+;;;;
+;;;; $Id: xml.lisp,v 1.16 2006-01-31 11:44:15 scaekenberghe Exp $
+;;;;
+;;;; This is a Common Lisp implementation of a basic but usable XML parser.
+;;;; The parser is non-validating and not complete (no PI).
+;;;; Namespace and entities are handled.
+;;;; The API into the parser is a pure functional parser hook model that comes from SSAX,
+;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
+;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
+;;;;
+;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;;
+;;;; You are granted the rights to distribute and use this software
+;;;; as governed by the terms of the Lisp Lesser General Public License
+;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
+
+(in-package :s-xml)
+
+;;; error reporting
+
+(define-condition xml-parser-error (error)
+ ((message :initarg :message :reader xml-parser-error-message)
+ (args :initarg :args :reader xml-parser-error-args)
+ (stream :initarg :stream :reader xml-parser-error-stream :initform nil))
+ (:report (lambda (condition stream)
+ (format stream
+ "XML parser ~?~@[ near stream position ~d~]."
+ (xml-parser-error-message condition)
+ (xml-parser-error-args condition)
+ (and (xml-parser-error-stream condition)
+ (file-position (xml-parser-error-stream condition))))))
+ (:documentation "Thrown by the XML parser to indicate errorneous input"))
+
+(setf (documentation 'xml-parser-error-message 'function)
+ "Get the message from an XML parser error"
+ (documentation 'xml-parser-error-args 'function)
+ "Get the error arguments from an XML parser error"
+ (documentation 'xml-parser-error-stream 'function)
+ "Get the stream from an XML parser error")
+
+(defun parser-error (message &optional args stream)
+ (make-condition 'xml-parser-error
+ :message message
+ :args args
+ :stream stream))
+
+;; attribute parsing hooks
+;; this is a bit complicated, refer to the mailing lists for a more detailed explanation
+
+(defun parse-attribute-name (string)
+ "Default parser for the attribute name"
+ (declare (special *namespaces*))
+ (resolve-identifier string *namespaces* t))
+
+(defun parse-attribute-value (name string)
+ "Default parser for the attribute value"
+ (declare (ignore name)
+ (special *ignore-namespace*))
+ (if *ignore-namespaces*
+ (copy-seq string)
+ string))
+
+(defparameter *attribute-name-parser* #'parse-attribute-name
+ "Called to compute interned attribute name from a buffer that will be reused")
+
+(defparameter *attribute-value-parser* #'parse-attribute-value
+ "Called to compute an element of an attribute list from a buffer that will be reused")
+
+;;; utilities
+
+(defun whitespace-char-p (char)
+ "Is char an XML whitespace character ?"
+ (declare (type character char))
+ (or (char= char #\space)
+ (char= char #\tab)
+ (char= char #\return)
+ (char= char #\linefeed)))
+
+(defun identifier-char-p (char)
+ "Is char an XML identifier character ?"
+ (declare (type character char))
+ (or (and (char<= #\A char) (char<= char #\Z))
+ (and (char<= #\a char) (char<= char #\z))
+ (and (char<= #\0 char) (char<= char #\9))
+ (char= char #\-)
+ (char= char #\_)
+ (char= char #\.)
+ (char= char #\:)))
+
+(defun skip-whitespace (stream)
+ "Skip over XML whitespace in stream, return first non-whitespace
+ character which was peeked but not read, return nil on eof"
+ (loop
+ (let ((char (peek-char nil stream nil #\Null)))
+ (declare (type character char))
+ (if (whitespace-char-p char)
+ (read-char stream)
+ (return char)))))
+
+(defun make-extendable-string (&optional (size 10))
+ "Make an extendable string which is a one-dimensional character
+ array which is adjustable and has a fill pointer"
+ (make-array size
+ :element-type 'character
+ :adjustable t
+ :fill-pointer 0))
+
+(defun print-string-xml (string stream &key (start 0) end)
+ "Write the characters of string to stream using basic XML conventions"
+ (loop for offset upfrom start below (or end (length string))
+ for char = (char string offset)
+ do (case char
+ (#\& (write-string "&amp;" stream))
+ (#\< (write-string "&lt;" stream))
+ (#\> (write-string "&gt;" stream))
+ (#\" (write-string "&quot;" stream))
+ ((#\newline #\return #\tab) (write-char char stream))
+ (t (if (and (<= 32 (char-code char))
+ (<= (char-code char) 126))
+ (write-char char stream)
+ (progn
+ (write-string "&#x" stream)
+ (write (char-code char) :stream stream :base 16)
+ (write-char #\; stream)))))))
+
+(defun make-standard-entities ()
+ "A hashtable mapping XML entity names to their replacement strings,
+ filled with the standard set"
+ (let ((entities (make-hash-table :test #'equal)))
+ (setf (gethash "amp" entities) (string #\&)
+ (gethash "quot" entities) (string #\")
+ (gethash "apos" entities) (string #\')
+ (gethash "lt" entities) (string #\<)
+ (gethash "gt" entities) (string #\>)
+ (gethash "nbsp" entities) (string #\space))
+ entities))
+
+(defun resolve-entity (stream extendable-string entities entity)
+ "Read and resolve an XML entity from stream, positioned after the '&' entity marker,
+ accepting &name; &#DEC; and &#xHEX; formats,
+ destructively modifying string, which is also returned,
+ destructively modifying entity, incorrect entity formats result in errors"
+ (declare (type (vector character) entity))
+ (loop
+ (let ((char (read-char stream nil #\Null)))
+ (declare (type character char))
+ (cond ((char= char #\Null) (error (parser-error "encountered eof before end of entity")))
+ ((char= #\; char) (return))
+ (t (vector-push-extend char entity)))))
+ (if (char= (char entity 0) #\#)
+ (let ((code (if (char= (char entity 1) #\x)
+ (parse-integer entity :start 2 :radix 16 :junk-allowed t)
+ (parse-integer entity :start 1 :radix 10 :junk-allowed t))))
+ (when (null code)
+ (error (parser-error "encountered incorrect entity &~s;" (list entity) stream)))
+ (vector-push-extend (code-char code) extendable-string))
+ (let ((value (gethash entity entities)))
+ (if value
+ (loop :for char :across value
+ :do (vector-push-extend char extendable-string))
+ (error (parser-error "encountered unknown entity &~s;" (list entity) stream)))))
+ extendable-string)
+
+;;; namespace support
+
+(defvar *ignore-namespaces* nil
+ "When t, namespaces are ignored like in the old version of S-XML")
+
+(defclass xml-namespace ()
+ ((uri :documentation "The URI used to identify this namespace"
+ :accessor get-uri
+ :initarg :uri)
+ (prefix :documentation "The preferred prefix assigned to this namespace"
+ :accessor get-prefix
+ :initarg :prefix
+ :initform nil)
+ (package :documentation "The Common Lisp package where this namespace's symbols are interned"
+ :accessor get-package
+ :initarg :package
+ :initform nil))
+ (:documentation "Describes an XML namespace and how it is handled"))
+
+(setf (documentation 'get-uri 'function)
+ "The URI used to identify this namespace"
+ (documentation 'get-prefix 'function)
+ "The preferred prefix assigned to this namespace"
+ (documentation 'get-package 'function)
+ "The Common Lisp package where this namespace's symbols are interned")
+
+(defmethod print-object ((object xml-namespace) stream)
+ (print-unreadable-object (object stream :type t :identity t)
+ (format stream "~A - ~A" (get-prefix object) (get-uri object))))
+
+(defvar *local-namespace* (make-instance 'xml-namespace
+ :uri "local"
+ :prefix ""
+ :package (find-package :keyword))
+ "The local (global default) XML namespace")
+
+(defvar *xml-namespace* (make-instance 'xml-namespace
+ :uri "http://www.w3.org/XML/1998/namespace"
+ :prefix "xml"
+ :package (or (find-package :xml)
+ (make-package :xml :nicknames '("XML"))))
+ "REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.")
+
+(defvar *known-namespaces* (list *local-namespace* *xml-namespace*)
+ "The list of known/defined namespaces")
+
+(defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*))
+ "Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable")
+
+(defun find-namespace (uri)
+ "Find a registered XML namespace identified by uri"
+ (find uri *known-namespaces* :key #'get-uri :test #'string-equal))
+
+(defun register-namespace (uri prefix package)
+ "Register a new or redefine an existing XML namespace defined by uri with prefix and package"
+ (let ((namespace (find-namespace uri)))
+ (if namespace
+ (setf (get-prefix namespace) prefix
+ (get-package namespace) (find-package package))
+ (push (setf namespace (make-instance 'xml-namespace
+ :uri uri
+ :prefix prefix
+ :package (find-package package)))
+ *known-namespaces*))
+ namespace))
+
+(defun find-namespace-binding (prefix namespaces)
+ "Find the XML namespace currently bound to prefix in the namespaces bindings"
+ (cdr (assoc prefix namespaces :test #'string-equal)))
+
+(defun split-identifier (identifier)
+ "Split an identifier 'prefix:name' and return (values prefix name)"
+ (when (symbolp identifier)
+ (setf identifier (symbol-name identifier)))
+ (let ((colon-position (position #\: identifier :test #'char=)))
+ (if colon-position
+ (values (subseq identifier 0 colon-position)
+ (subseq identifier (1+ colon-position)))
+ (values nil identifier))))
+
+(defvar *require-existing-symbols* nil
+ "If t, each XML identifier must exist as symbol already")
+
+(defvar *auto-export-symbols* t
+ "If t, export newly interned symbols form their packages")
+
+(defun resolve-identifier (identifier namespaces &optional as-attribute)
+ "Resolve the string identifier in the list of namespace bindings"
+ (if *ignore-namespaces*
+ (intern identifier :keyword)
+ (flet ((intern-symbol (string package) ; intern string as a symbol in package
+ (if *require-existing-symbols*
+ (let ((symbol (find-symbol string package)))
+ (or symbol
+ (error "Symbol ~s does not exist in ~s" string package)))
+ (let ((symbol (intern string package)))
+ (when (and *auto-export-symbols*
+ (not (eql package (find-package :keyword))))
+ (export symbol package))
+ symbol))))
+ (multiple-value-bind (prefix name)
+ (split-identifier identifier)
+ (if (or (null prefix) (string= prefix "xmlns"))
+ (if as-attribute
+ (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*))
+ (let ((default-namespace (find-namespace-binding "" namespaces)))
+ (intern-symbol name (get-package default-namespace))))
+ (let ((namespace (find-namespace-binding prefix namespaces)))
+ (if namespace
+ (intern-symbol name (get-package namespace))
+ (error "namespace not found for prefix ~s" prefix))))))))
+
+(defvar *auto-create-namespace-packages* t
+ "If t, new packages will be created for namespaces, if needed, named by the prefix")
+
+(defun new-namespace (uri &optional prefix)
+ "Register a new namespace for uri and prefix, creating a package if necessary"
+ (if prefix
+ (register-namespace uri
+ prefix
+ (or (find-package prefix)
+ (if *auto-create-namespace-packages*
+ (make-package prefix :nicknames `(,(string-upcase prefix)))
+ (error "Cannot find or create package ~s" prefix))))
+ (let ((unique-name (loop :for i :upfrom 0
+ :do (let ((name (format nil "ns-~d" i)))
+ (when (not (find-package name))
+ (return name))))))
+ (register-namespace uri
+ unique-name
+ (if *auto-create-namespace-packages*
+ (make-package (string-upcase unique-name) :nicknames `(,unique-name))
+ (error "Cannot create package ~s" unique-name))))))
+
+(defun extend-namespaces (attributes namespaces)
+ "Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings"
+ (unless *ignore-namespaces*
+ (let (default-namespace-uri)
+ (loop :for (key . value) :in attributes
+ :do (if (string= key "xmlns")
+ (setf default-namespace-uri value)
+ (multiple-value-bind (prefix name)
+ (split-identifier key)
+ (when (string= prefix "xmlns")
+ (let* ((uri value)
+ (prefix name)
+ (namespace (find-namespace uri)))
+ (unless namespace
+ (setf namespace (new-namespace uri prefix)))
+ (push `(,prefix . ,namespace) namespaces))))))
+ (when default-namespace-uri
+ (let ((namespace (find-namespace default-namespace-uri)))
+ (unless namespace
+ (setf namespace (new-namespace default-namespace-uri)))
+ (push `("" . ,namespace) namespaces)))))
+ namespaces)
+
+(defun print-identifier (identifier stream &optional as-attribute)
+ "Print identifier on stream using namespace conventions"
+ (declare (ignore as-attribute) (special *namespaces*))
+ (if *ignore-namespaces*
+ (princ identifier stream)
+ (if (symbolp identifier)
+ (let ((package (symbol-package identifier))
+ (name (symbol-name identifier)))
+ (let* ((namespace (find package *known-namespaces* :key #'get-package))
+ (prefix (or (car (find namespace *namespaces* :key #'cdr))
+ (get-prefix namespace))))
+ (if (string= prefix "")
+ (princ name stream)
+ (format stream "~a:~a" prefix name))))
+ (princ identifier stream))))
+
+;;; the parser state
+
+(defclass xml-parser-state ()
+ ((entities :documentation "A hashtable mapping XML entity names to their replacement stings"
+ :accessor get-entities
+ :initarg :entities
+ :initform (make-standard-entities))
+ (seed :documentation "The user seed object"
+ :accessor get-seed
+ :initarg :seed
+ :initform nil)
+ (buffer :documentation "The main reusable character buffer"
+ :accessor get-buffer
+ :initform (make-extendable-string))
+ (mini-buffer :documentation "The secondary, smaller reusable character buffer"
+ :accessor get-mini-buffer
+ :initform (make-extendable-string))
+ (new-element-hook :documentation "Called when new element starts"
+ ;; Handle the start of a new xml element with name and attributes,
+ ;; receiving seed from previous element (sibling or parent)
+ ;; return seed to be used for first child (content)
+ ;; or directly to finish-element-hook
+ :accessor get-new-element-hook
+ :initarg :new-element-hook
+ :initform #'(lambda (name attributes seed)
+ (declare (ignore name attributes))
+ seed))
+ (finish-element-hook :documentation "Called when element ends"
+ ;; Handle the end of an xml element with name and attributes,
+ ;; receiving parent-seed, the seed passed to us when this element started,
+ ;; i.e. passed to our corresponding new-element-hook
+ ;; and receiving seed from last child (content)
+ ;; or directly from new-element-hook
+ ;; return final seed for this element to next element (sibling or parent)
+ :accessor get-finish-element-hook
+ :initarg :finish-element-hook
+ :initform #'(lambda (name attributes parent-seed seed)
+ (declare (ignore name attributes parent-seed))
+ seed))
+ (text-hook :documentation "Called when text is found"
+ ;; Handle text in string, found as contents,
+ ;; receiving seed from previous element (sibling or parent),
+ ;; return final seed for this element to next element (sibling or parent)
+ :accessor get-text-hook
+ :initarg :text-hook
+ :initform #'(lambda (string seed)
+ (declare (ignore string))
+ seed)))
+ (:documentation "The XML parser state passed along all code making up the parser"))
+
+(setf (documentation 'get-seed 'function)
+ "Get the initial user seed of an XML parser state"
+ (documentation 'get-entities 'function)
+ "Get the entities hashtable of an XML parser state"
+ (documentation 'get-new-element-hook 'function)
+ "Get the new element hook of an XML parser state"
+ (documentation 'get-finish-element-hook 'function)
+ "Get the finish element hook of an XML parser state"
+ (documentation 'get-text-hook 'function)
+ "Get the text hook of an XML parser state")
+
+#-allegro
+(setf (documentation '(setf get-seed) 'function)
+ "Set the initial user seed of an XML parser state"
+ (documentation '(setf get-entities) 'function)
+ "Set the entities hashtable of an XML parser state"
+ (documentation '(setf get-new-element-hook) 'function)
+ "Set the new element hook of an XML parser state"
+ (documentation '(setf get-finish-element-hook) 'function)
+ "Set the finish element hook of an XML parser state"
+ (documentation '(setf get-text-hook) 'function)
+ "Set the text hook of an XML parser state")
+
+(defmethod get-mini-buffer :after ((state xml-parser-state))
+ "Reset and return the reusable mini buffer"
+ (with-slots (mini-buffer) state
+ (setf (fill-pointer mini-buffer) 0)))
+
+(defmethod get-buffer :after ((state xml-parser-state))
+ "Reset and return the main reusable buffer"
+ (with-slots (buffer) state
+ (setf (fill-pointer buffer) 0)))
+
+;;; parser support
+
+(defun parse-whitespace (stream extendable-string)
+ "Read and collect XML whitespace from stream in string which is
+ destructively modified, return first non-whitespace character which
+ was peeked but not read, return #\Null on eof"
+ (declare (type (vector character) extendable-string))
+ (loop
+ (let ((char (peek-char nil stream nil #\Null)))
+ (declare (type character char))
+ (if (whitespace-char-p char)
+ (vector-push-extend (read-char stream) extendable-string)
+ (return char)))))
+
+(defun parse-string (stream state string)
+ "Read and return an XML string from stream, delimited by either
+ single or double quotes, the stream is expected to be on the opening
+ delimiter, at the end the closing delimiter is also read, entities
+ are resolved, eof before end of string is an error"
+ (declare (type (vector character) string))
+ (let ((delimiter (read-char stream nil #\Null))
+ (char #\Null))
+ (declare (type character delimiter char))
+ (unless (or (char= delimiter #\') (char= delimiter #\"))
+ (error (parser-error "expected string delimiter" nil stream)))
+ (loop
+ (setf char (read-char stream nil #\Null))
+ (cond ((char= char #\Null) (error (parser-error "encountered eof before end of string")))
+ ((char= char delimiter) (return))
+ ((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state)))
+ (t (vector-push-extend char string))))
+ string))
+
+(defun parse-text (stream state extendable-string)
+ "Read and collect XML text from stream in string which is
+ destructively modified, the text ends with a '<', which is peeked and
+ returned, entities are resolved, eof is considered an error"
+ (declare (type (vector character) extendable-string))
+ (let ((char #\Null))
+ (declare (type character char))
+ (loop
+ (setf char (peek-char nil stream nil #\Null))
+ (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text")))
+ (when (char= char #\<) (return))
+ (read-char stream)
+ (if (char= char #\&)
+ (resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state))
+ (vector-push-extend char extendable-string)))
+ char))
+
+(defun parse-identifier (stream identifier)
+ "Read and returns an XML identifier from stream, positioned at the
+ start of the identifier, ending with the first non-identifier
+ character, which is peeked, the identifier is written destructively
+ into identifier which is also returned"
+ (declare (type (vector character) identifier))
+ (loop
+ (let ((char (read-char stream nil #\Null)))
+ (declare (type character char))
+ (cond ((identifier-char-p char)
+ (vector-push-extend char identifier))
+ (t
+ (when (char/= char #\Null) (unread-char char stream))
+ (return identifier))))))
+
+(defun skip-comment (stream)
+ "Skip an XML comment in stream, positioned after the opening '<!--',
+ consumes the closing '-->' sequence, unexpected eof or a malformed
+ closing sequence result in a error"
+ (let ((dashes-to-read 2))
+ (loop
+ (if (zerop dashes-to-read) (return))
+ (let ((char (read-char stream nil #\Null)))
+ (declare (type character char))
+ (if (char= char #\Null)
+ (error (parser-error "encountered unexpected eof for comment")))
+ (if (char= char #\-)
+ (decf dashes-to-read)
+ (setf dashes-to-read 2)))))
+ (if (char/= (read-char stream nil #\Null) #\>)
+ (error (parser-error "expected > ending comment" nil stream))))
+
+(defun read-cdata (stream state string)
+ "Reads in the CDATA and calls the callback for CDATA if it exists"
+ ;; we already read the <![CDATA[ stuff
+ ;; continue to read until we hit ]]>
+ (let ((char #\space)
+ (last-3-characters (list #\[ #\A #\T))
+ (pattern (list #\> #\] #\])))
+ (declare (type character char))
+ (loop
+ (setf char (read-char stream nil #\Null))
+ (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text")))
+ (push char last-3-characters)
+ (setf (cdddr last-3-characters) nil)
+ (cond
+ ((equal last-3-characters
+ pattern)
+ (setf (fill-pointer string)
+ (- (fill-pointer string) 2))
+ (setf (get-seed state)
+ (funcall (get-text-hook state)
+ (copy-seq string)
+ (get-seed state)))
+ (return-from read-cdata))
+ (t
+ (vector-push-extend char string))))))
+
+(defun skip-special-tag (stream state)
+ "Skip an XML special tag (comments and processing instructions) in
+ stream, positioned after the opening '<', unexpected eof is an error"
+ ;; opening < has been read, consume ? or !
+ (read-char stream)
+ (let ((char (read-char stream nil #\Null)))
+ (declare (type character char))
+ ;; see if we are dealing with a comment
+ (when (char= char #\-)
+ (setf char (read-char stream nil #\Null))
+ (when (char= char #\-)
+ (skip-comment stream)
+ (return-from skip-special-tag)))
+ ;; maybe we are dealing with CDATA?
+ (when (and (char= char #\[)
+ (loop :for pattern :across "CDATA["
+ :for char = (read-char stream nil #\Null)
+ :when (char= char #\Null) :do
+ (error (parser-error "encountered unexpected eof in cdata"))
+ :always (char= char pattern)))
+ (read-cdata stream state (get-buffer state))
+ (return-from skip-special-tag))
+ ;; loop over chars, dealing with strings (skipping their content)
+ ;; and counting opening and closing < and > chars
+ (let ((taglevel 1)
+ (string-delimiter #\Null))
+ (declare (type character string-delimiter))
+ (loop
+ (when (zerop taglevel) (return))
+ (setf char (read-char stream nil #\Null))
+ (when (char= char #\Null)
+ (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream)))
+ (if (char/= string-delimiter #\Null)
+ ;; inside a string we only look for a closing string delimiter
+ (when (char= char string-delimiter)
+ (setf string-delimiter #\Null))
+ ;; outside a string we count < and > and watch out for strings
+ (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char))
+ ((char= char #\<) (incf taglevel))
+ ((char= char #\>) (decf taglevel))))))))
+
+;;; the XML parser proper
+
+(defun parse-xml-element-attributes (stream state)
+ "Parse XML element attributes from stream positioned after the tag
+ identifier, returning the attributes as an assoc list, ending at
+ either a '>' or a '/' which is peeked and also returned"
+ (declare (special *namespaces*))
+ (let ((char #\Null) attributes)
+ (declare (type character char))
+ (loop
+ ;; skip whitespace separating items
+ (setf char (skip-whitespace stream))
+ ;; start tag attributes ends with > or />
+ (when (or (char= char #\>) (char= char #\/)) (return))
+ ;; read the attribute key
+ (let ((key (let ((string (parse-identifier stream (get-mini-buffer state))))
+ (if *ignore-namespaces*
+ (funcall *attribute-name-parser* string)
+ (copy-seq string)))))
+ ;; skip separating whitespace
+ (setf char (skip-whitespace stream))
+ ;; require = sign (and consume it if present)
+ (if (char= char #\=)
+ (read-char stream)
+ (error (parser-error "expected =" nil stream)))
+ ;; skip separating whitespace
+ (skip-whitespace stream)
+ ;; read the attribute value as a string
+ (push (cons key (let ((string (parse-string stream state (get-buffer state))))
+ (if *ignore-namespaces*
+ (funcall *attribute-value-parser* key string)
+ (copy-seq string))))
+ attributes)))
+ ;; return attributes peek char ending loop
+ (values attributes char)))
+
+(defun parse-xml-element (stream state)
+ "Parse and return an XML element from stream, positioned after the opening '<'"
+ (declare (special *namespaces*))
+ ;; opening < has been read
+ (when (char= (peek-char nil stream nil #\Null) #\!)
+ (skip-special-tag stream state)
+ (return-from parse-xml-element))
+ (let ((char #\Null) buffer open-tag parent-seed has-children)
+ (declare (type character char))
+ (setf parent-seed (get-seed state))
+ ;; read tag name (no whitespace between < and name ?)
+ (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state))))
+ ;; tag has been read, read attributes if any
+ (multiple-value-bind (attributes peeked-char)
+ (parse-xml-element-attributes stream state)
+ (let ((*namespaces* (extend-namespaces attributes *namespaces*)))
+ (setf open-tag (resolve-identifier open-tag *namespaces*))
+ (unless *ignore-namespaces*
+ (dolist (attribute attributes)
+ (setf (car attribute) (funcall *attribute-name-parser* (car attribute))
+ (cdr attribute) (funcall *attribute-value-parser* (car attribute) (cdr attribute)))))
+ (setf (get-seed state) (funcall (get-new-element-hook state)
+ open-tag attributes (get-seed state)))
+ (setf char peeked-char)
+ (when (char= char #\/)
+ ;; handle solitary tag of the form <tag .. />
+ (read-char stream)
+ (setf char (read-char stream nil #\Null))
+ (if (char= #\> char)
+ (progn
+ (setf (get-seed state) (funcall (get-finish-element-hook state)
+ open-tag attributes parent-seed (get-seed state)))
+ (return-from parse-xml-element))
+ (error (parser-error "expected >" nil stream))))
+ ;; consume >
+ (read-char stream)
+ (loop
+ (setf buffer (get-buffer state))
+ ;; read whitespace into buffer
+ (setf char (parse-whitespace stream buffer))
+ ;; see what ended the whitespace scan
+ (cond ((char= char #\Null) (error (parser-error "encountered unexpected eof handling ~a"
+ (list open-tag))))
+ ((char= char #\<)
+ ;; consume the <
+ (read-char stream)
+ (if (char= (peek-char nil stream nil #\Null) #\/)
+ (progn
+ ;; handle the matching closing tag </tag> and done
+ ;; if we read whitespace as this (leaf) element's contents, it is significant
+ (when (and (not has-children) (plusp (length buffer)))
+ (setf (get-seed state) (funcall (get-text-hook state)
+ (copy-seq buffer) (get-seed state))))
+ (read-char stream)
+ (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state))
+ *namespaces*)))
+ (unless (eq open-tag close-tag)
+ (error (parser-error "found <~a> not matched by </~a> but by <~a>"
+ (list open-tag open-tag close-tag) stream)))
+ (unless (char= (read-char stream nil #\Null) #\>)
+ (error (parser-error "expected >" nil stream)))
+ (setf (get-seed state) (funcall (get-finish-element-hook state)
+ open-tag attributes parent-seed (get-seed state))))
+ (return))
+ ;; handle child tag and loop, no hooks to call here
+ ;; whitespace between child elements is skipped
+ (progn
+ (setf has-children t)
+ (parse-xml-element stream state))))
+ (t
+ ;; no child tag, concatenate text to whitespace in buffer
+ ;; handle text content and loop
+ (setf char (parse-text stream state buffer))
+ (setf (get-seed state) (funcall (get-text-hook state)
+ (copy-seq buffer) (get-seed state))))))))))
+
+(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state)))
+ "Parse and return a toplevel XML element from stream, using parser state"
+ (loop
+ (let ((char (skip-whitespace stream)))
+ (when (char= char #\Null) (return-from start-parse-xml))
+ ;; skip whitespace until start tag
+ (unless (char= char #\<)
+ (error (parser-error "expected <" nil stream)))
+ (read-char stream) ; consume peeked char
+ (setf char (peek-char nil stream nil #\Null))
+ (if (or (char= char #\!) (char= char #\?))
+ ;; deal with special tags
+ (skip-special-tag stream state)
+ (progn
+ ;; read the main element
+ (parse-xml-element stream state)
+ (return-from start-parse-xml (get-seed state)))))))
+
+;;;; eof