diff options
author | Matthias Benkard <code@mail.matthias.benkard.de> | 2009-10-30 20:52:07 +0100 |
---|---|---|
committer | Matthias Benkard <code@mail.matthias.benkard.de> | 2009-10-30 20:52:07 +0100 |
commit | ddb83b1fb2d305e0c06fc067d82d6bab5458b0fd (patch) | |
tree | 8f9003926f0b5295c7a04b2ca257c0a2155ce841 /third-party/s-xml/src | |
parent | 15937a1a4f1cf40bc55aa34eb71c67b88466ff57 (diff) |
Add third-party XML processing libraries.
Ignore-this: 5ca28497555bf944858ca2f58bc8a62b
darcs-hash:a0b0f9baa7c9b1259e755435db1fb17123630a6c
Diffstat (limited to 'third-party/s-xml/src')
-rw-r--r-- | third-party/s-xml/src/dom.lisp | 75 | ||||
-rw-r--r-- | third-party/s-xml/src/lxml-dom.lisp | 83 | ||||
-rw-r--r-- | third-party/s-xml/src/package.lisp | 46 | ||||
-rw-r--r-- | third-party/s-xml/src/sxml-dom.lisp | 76 | ||||
-rw-r--r-- | third-party/s-xml/src/xml-struct-dom.lisp | 125 | ||||
-rw-r--r-- | third-party/s-xml/src/xml.lisp | 700 |
6 files changed, 1105 insertions, 0 deletions
diff --git a/third-party/s-xml/src/dom.lisp b/third-party/s-xml/src/dom.lisp new file mode 100644 index 0000000..dcf6e82 --- /dev/null +++ b/third-party/s-xml/src/dom.lisp @@ -0,0 +1,75 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: dom.lisp,v 1.2 2005-08-29 15:01:47 scaekenberghe Exp $ +;;;; +;;;; This is the generic simple DOM parser and printer interface. +;;;; +;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; top level DOM parser interface + +(defgeneric parse-xml-dom (stream output-type) + (:documentation "Parse a character stream as XML and generate a DOM of output-type")) + +(defun parse-xml (stream &key (output-type :lxml)) + "Parse a character stream as XML and generate a DOM of output-type, defaulting to :lxml" + (parse-xml-dom stream output-type)) + +(defun parse-xml-string (string &key (output-type :lxml)) + "Parse a string as XML and generate a DOM of output-type, defaulting to :lxml" + (with-input-from-string (stream string) + (parse-xml-dom stream output-type))) + +(defun parse-xml-file (filename &key (output-type :lxml)) + "Parse a character file as XML and generate a DOM of output-type, defaulting to :lxml" + (with-open-file (in filename :direction :input) + (parse-xml-dom in output-type))) + +;;; top level DOM printer interface + +(defgeneric print-xml-dom (dom input-type stream pretty level) + (:documentation "Generate XML output on a character stream from a DOM of input-type, optionally pretty printing using level")) + +(defun print-xml (dom &key (stream t) (pretty nil) (input-type :lxml) (header)) + "Generate XML output on a character stream (t by default) from a DOM of input-type (:lxml by default), optionally pretty printing (off by default), or adding a header (none by default)" + (when header (format stream header)) + (when pretty (terpri stream)) + (print-xml-dom dom input-type stream pretty 1)) + +(defun print-xml-string (dom &key (pretty nil) (input-type :lxml)) + "Generate XML output to a string from a DOM of input-type (:lxml by default), optionally pretty printing (off by default)" + (with-output-to-string (stream) + (print-xml dom :stream stream :pretty pretty :input-type input-type))) + +;;; shared/common support functions + +(defun print-spaces (n stream &optional (preceding-newline t)) + (when preceding-newline + (terpri stream)) + (loop :repeat n + :do (write-char #\Space stream))) + +(defun print-solitary-tag (tag stream) + (write-char #\< stream) + (print-identifier tag stream) + (write-string "/>" stream)) + +(defun print-closing-tag (tag stream) + (write-string "</" stream) + (print-identifier tag stream) + (write-char #\> stream)) + +(defun print-attribute (name value stream) + (write-char #\space stream) + (print-identifier name stream t) + (write-string "=\"" stream) + (print-string-xml value stream) + (write-char #\" stream)) + +;;;; eof diff --git a/third-party/s-xml/src/lxml-dom.lisp b/third-party/s-xml/src/lxml-dom.lisp new file mode 100644 index 0000000..449fea3 --- /dev/null +++ b/third-party/s-xml/src/lxml-dom.lisp @@ -0,0 +1,83 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: lxml-dom.lisp,v 1.6 2005-11-20 14:34:15 scaekenberghe Exp $ +;;;; +;;;; LXML implementation of the generic DOM parser and printer. +;;;; +;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; the lxml hooks to generate lxml + +(defun lxml-new-element-hook (name attributes seed) + (declare (ignore name attributes seed)) + '()) + +(defun lxml-finish-element-hook (name attributes parent-seed seed) + (let ((xml-element + (cond ((and (null seed) (null attributes)) + name) + (attributes + `((,name ,@(let (list) + (dolist (attribute attributes list) + (push (cdr attribute) list) + (push (car attribute) list)))) + ,@(nreverse seed))) + (t + `(,name ,@(nreverse seed)))))) + (cons xml-element parent-seed))) + +(defun lxml-text-hook (string seed) + (cons string seed)) + +;;; standard DOM interfaces + +(defmethod parse-xml-dom (stream (output-type (eql :lxml))) + (car (start-parse-xml stream + (make-instance 'xml-parser-state + :new-element-hook #'lxml-new-element-hook + :finish-element-hook #'lxml-finish-element-hook + :text-hook #'lxml-text-hook)))) + +(defun plist->alist (plist) + (when plist + (cons (cons (first plist) (second plist)) + (plist->alist (rest (rest plist)))))) + +(defmethod print-xml-dom (dom (input-type (eql :lxml)) stream pretty level) + (declare (special *namespaces*)) + (cond ((symbolp dom) (print-solitary-tag dom stream)) + ((stringp dom) (print-string-xml dom stream)) + ((consp dom) + (let (tag attributes) + (cond ((symbolp (first dom)) (setf tag (first dom))) + ((consp (first dom)) (setf tag (first (first dom)) + attributes (plist->alist (rest (first dom))))) + (t (error "Input not recognized as LXML ~s" dom))) + (let ((*namespaces* (extend-namespaces attributes *namespaces*))) + (write-char #\< stream) + (print-identifier tag stream) + (loop :for (name . value) :in attributes + :do (print-attribute name value stream)) + (if (rest dom) + (let ((children (rest dom))) + (write-char #\> stream) + (if (and (= (length children) 1) (stringp (first children))) + (print-string-xml (first children) stream) + (progn + (dolist (child children) + (when pretty (print-spaces (* 2 level) stream)) + (if (stringp child) + (print-string-xml child stream) + (print-xml-dom child input-type stream pretty (1+ level)))) + (when pretty (print-spaces (* 2 (1- level)) stream)))) + (print-closing-tag tag stream)) + (write-string "/>" stream))))) + (t (error "Input not recognized as LXML ~s" dom)))) + +;;;; eof
\ No newline at end of file diff --git a/third-party/s-xml/src/package.lisp b/third-party/s-xml/src/package.lisp new file mode 100644 index 0000000..1fc0cca --- /dev/null +++ b/third-party/s-xml/src/package.lisp @@ -0,0 +1,46 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: package.lisp,v 1.8 2006-01-31 11:44:15 scaekenberghe Exp $ +;;;; +;;;; This is a Common Lisp implementation of a very basic XML parser. +;;;; The parser is non-validating. +;;;; The API into the parser is pure functional parser hook model that comes from SSAX, +;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net +;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one. +;;;; +;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(defpackage s-xml + (:use common-lisp) + (:export + ;; main parser interface + #:start-parse-xml + #:print-string-xml + #:xml-parser-error #:xml-parser-error-message #:xml-parser-error-args #:xml-parser-error-stream + #:xml-parser-state #:get-entities #:get-seed + #:get-new-element-hook #:get-finish-element-hook #:get-text-hook + ;; callbacks + #:*attribute-name-parser* + #:*attribute-value-parser* + #:parse-attribute-name + #:parse-attribute-value + ;; dom parser and printer + #:parse-xml-dom #:parse-xml #:parse-xml-string #:parse-xml-file + #:print-xml-dom #:print-xml #:print-xml-string + ;; xml-element structure + #:make-xml-element #:xml-element-children #:xml-element-name + #:xml-element-attribute #:xml-element-attributes + #:xml-element-p #:new-xml-element #:first-xml-element-child + ;; namespaces + #:*ignore-namespaces* #:*local-namespace* #:*namespaces* + #:*require-existing-symbols* #:*auto-export-symbols* #:*auto-create-namespace-packages* + #:find-namespace #:register-namespace #:get-prefix #:get-uri #:get-package + #:resolve-identifier #:extend-namespaces #:print-identifier #:split-identifier) + (:documentation + "A simple XML parser with an efficient, purely functional, event-based interface as well as a DOM interface")) + +;;;; eof diff --git a/third-party/s-xml/src/sxml-dom.lisp b/third-party/s-xml/src/sxml-dom.lisp new file mode 100644 index 0000000..dee3de8 --- /dev/null +++ b/third-party/s-xml/src/sxml-dom.lisp @@ -0,0 +1,76 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: sxml-dom.lisp,v 1.5 2005-11-20 14:34:15 scaekenberghe Exp $ +;;;; +;;;; LXML implementation of the generic DOM parser and printer. +;;;; +;;;; Copyright (C) 2003, 2004 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; the sxml hooks to generate sxml + +(defun sxml-new-element-hook (name attributes seed) + (declare (ignore name attributes seed)) + '()) + +(defun sxml-finish-element-hook (name attributes parent-seed seed) + (let ((xml-element (append (list name) + (when attributes + (list (let (list) + (dolist (attribute attributes (cons :@ list)) + (push (list (car attribute) (cdr attribute)) list))))) + (nreverse seed)))) + (cons xml-element parent-seed))) + +(defun sxml-text-hook (string seed) + (cons string seed)) + +;;; the standard DOM interfaces + +(defmethod parse-xml-dom (stream (output-type (eql :sxml))) + (car (start-parse-xml stream + (make-instance 'xml-parser-state + :new-element-hook #'sxml-new-element-hook + :finish-element-hook #'sxml-finish-element-hook + :text-hook #'sxml-text-hook)))) + +(defmethod print-xml-dom (dom (input-type (eql :sxml)) stream pretty level) + (declare (special *namespaces*)) + (cond ((stringp dom) (print-string-xml dom stream)) + ((consp dom) + (let ((tag (first dom)) + attributes + children) + (if (and (consp (second dom)) (eq (first (second dom)) :@)) + (setf attributes (rest (second dom)) + children (rest (rest dom))) + (setf children (rest dom))) + (let ((*namespaces* (extend-namespaces (loop :for (name value) :in attributes + :collect (cons name value)) + *namespaces*))) + (write-char #\< stream) + (print-identifier tag stream) + (loop :for (name value) :in attributes + :do (print-attribute name value stream)) + (if children + (progn + (write-char #\> stream) + (if (and (= (length children) 1) (stringp (first children))) + (print-string-xml (first children) stream) + (progn + (dolist (child children) + (when pretty (print-spaces (* 2 level) stream)) + (if (stringp child) + (print-string-xml child stream) + (print-xml-dom child input-type stream pretty (1+ level)))) + (when pretty (print-spaces (* 2 (1- level)) stream)))) + (print-closing-tag tag stream)) + (write-string "/>" stream))))) + (t (error "Input not recognized as SXML ~s" dom)))) + +;;;; eof diff --git a/third-party/s-xml/src/xml-struct-dom.lisp b/third-party/s-xml/src/xml-struct-dom.lisp new file mode 100644 index 0000000..916f747 --- /dev/null +++ b/third-party/s-xml/src/xml-struct-dom.lisp @@ -0,0 +1,125 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: xml-struct-dom.lisp,v 1.3 2005-09-20 09:57:48 scaekenberghe Exp $ +;;;; +;;;; XML-STRUCT implementation of the generic DOM parser and printer. +;;;; +;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; xml-element struct datastructure and API + +(defstruct xml-element + name ; :tag-name + attributes ; a assoc list of (:attribute-name . "attribute-value") + children ; a list of children/content either text strings or xml-elements + ) + +(setf (documentation 'xml-element-p 'function) + "Return T when the argument is an xml-element struct" + (documentation 'xml-element-attributes 'function) + "Return the alist of attribute names and values dotted pairs from an xml-element struct" + (documentation 'xml-element-children 'function) + "Return the list of children from an xml-element struct" + (documentation 'xml-element-name 'function) + "Return the name from an xml-element struct" + (documentation 'make-xml-element 'function) + "Make and return a new xml-element struct") + +(defun xml-element-attribute (xml-element key) + "Return the string value of the attribute with name the keyword :key + of xml-element if any, return null if not found" + (let ((pair (assoc key (xml-element-attributes xml-element) :test #'eq))) + (when pair (cdr pair)))) + +(defun (setf xml-element-attribute) (value xml-element key) + "Set the string value of the attribute with name the keyword :key of + xml-element, creating a new attribute if necessary or overwriting an + existing one, returning the value" + (let ((attributes (xml-element-attributes xml-element))) + (if (null attributes) + (push (cons key value) (xml-element-attributes xml-element)) + (let ((pair (assoc key attributes :test #'eq))) + (if pair + (setf (cdr pair) value) + (push (cons key value) (xml-element-attributes xml-element))))) + value)) + +(defun new-xml-element (name &rest children) + "Make a new xml-element with name and children" + (make-xml-element :name name :children children)) + +(defun first-xml-element-child (xml-element) + "Get the first child of an xml-element" + (first (xml-element-children xml-element))) + +(defun xml-equal (xml-1 xml-2) + (and (xml-element-p xml-1) + (xml-element-p xml-2) + (eq (xml-element-name xml-1) + (xml-element-name xml-2)) + (equal (xml-element-attributes xml-1) + (xml-element-attributes xml-2)) + (reduce #'(lambda (&optional (x t) (y t)) (and x y)) + (mapcar #'(lambda (x y) + (or (and (stringp x) (stringp y) (string= x y)) + (xml-equal x y))) + (xml-element-children xml-1) + (xml-element-children xml-2))))) + +;;; printing xml structures + +(defmethod print-xml-dom (xml-element (input-type (eql :xml-struct)) stream pretty level) + (declare (special *namespaces*)) + (let ((*namespaces* (extend-namespaces (xml-element-attributes xml-element) + *namespaces*))) + (write-char #\< stream) + (print-identifier (xml-element-name xml-element) stream) + (loop :for (name . value) :in (xml-element-attributes xml-element) + :do (print-attribute name value stream)) + (let ((children (xml-element-children xml-element))) + (if children + (progn + (write-char #\> stream) + (if (and (= (length children) 1) (stringp (first children))) + (print-string-xml (first children) stream) + (progn + (dolist (child children) + (when pretty (print-spaces (* 2 level) stream)) + (if (stringp child) + (print-string-xml child stream) + (print-xml-dom child input-type stream pretty (1+ level)))) + (when pretty (print-spaces (* 2 (1- level)) stream)))) + (print-closing-tag (xml-element-name xml-element) stream)) + (write-string "/>" stream))))) + +;;; the standard hooks to generate xml-element structs + +(defun standard-new-element-hook (name attributes seed) + (declare (ignore name attributes seed)) + '()) + +(defun standard-finish-element-hook (name attributes parent-seed seed) + (let ((xml-element (make-xml-element :name name + :attributes attributes + :children (nreverse seed)))) + (cons xml-element parent-seed))) + +(defun standard-text-hook (string seed) + (cons string seed)) + +;;; top level standard parser interfaces + +(defmethod parse-xml-dom (stream (output-type (eql :xml-struct))) + (car (start-parse-xml stream + (make-instance 'xml-parser-state + :new-element-hook #'standard-new-element-hook + :finish-element-hook #'standard-finish-element-hook + :text-hook #'standard-text-hook)))) + +;;;; eof diff --git a/third-party/s-xml/src/xml.lisp b/third-party/s-xml/src/xml.lisp new file mode 100644 index 0000000..8df61c6 --- /dev/null +++ b/third-party/s-xml/src/xml.lisp @@ -0,0 +1,700 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: xml.lisp,v 1.16 2006-01-31 11:44:15 scaekenberghe Exp $ +;;;; +;;;; This is a Common Lisp implementation of a basic but usable XML parser. +;;;; The parser is non-validating and not complete (no PI). +;;;; Namespace and entities are handled. +;;;; The API into the parser is a pure functional parser hook model that comes from SSAX, +;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net +;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one. +;;;; +;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; error reporting + +(define-condition xml-parser-error (error) + ((message :initarg :message :reader xml-parser-error-message) + (args :initarg :args :reader xml-parser-error-args) + (stream :initarg :stream :reader xml-parser-error-stream :initform nil)) + (:report (lambda (condition stream) + (format stream + "XML parser ~?~@[ near stream position ~d~]." + (xml-parser-error-message condition) + (xml-parser-error-args condition) + (and (xml-parser-error-stream condition) + (file-position (xml-parser-error-stream condition)))))) + (:documentation "Thrown by the XML parser to indicate errorneous input")) + +(setf (documentation 'xml-parser-error-message 'function) + "Get the message from an XML parser error" + (documentation 'xml-parser-error-args 'function) + "Get the error arguments from an XML parser error" + (documentation 'xml-parser-error-stream 'function) + "Get the stream from an XML parser error") + +(defun parser-error (message &optional args stream) + (make-condition 'xml-parser-error + :message message + :args args + :stream stream)) + +;; attribute parsing hooks +;; this is a bit complicated, refer to the mailing lists for a more detailed explanation + +(defun parse-attribute-name (string) + "Default parser for the attribute name" + (declare (special *namespaces*)) + (resolve-identifier string *namespaces* t)) + +(defun parse-attribute-value (name string) + "Default parser for the attribute value" + (declare (ignore name) + (special *ignore-namespace*)) + (if *ignore-namespaces* + (copy-seq string) + string)) + +(defparameter *attribute-name-parser* #'parse-attribute-name + "Called to compute interned attribute name from a buffer that will be reused") + +(defparameter *attribute-value-parser* #'parse-attribute-value + "Called to compute an element of an attribute list from a buffer that will be reused") + +;;; utilities + +(defun whitespace-char-p (char) + "Is char an XML whitespace character ?" + (declare (type character char)) + (or (char= char #\space) + (char= char #\tab) + (char= char #\return) + (char= char #\linefeed))) + +(defun identifier-char-p (char) + "Is char an XML identifier character ?" + (declare (type character char)) + (or (and (char<= #\A char) (char<= char #\Z)) + (and (char<= #\a char) (char<= char #\z)) + (and (char<= #\0 char) (char<= char #\9)) + (char= char #\-) + (char= char #\_) + (char= char #\.) + (char= char #\:))) + +(defun skip-whitespace (stream) + "Skip over XML whitespace in stream, return first non-whitespace + character which was peeked but not read, return nil on eof" + (loop + (let ((char (peek-char nil stream nil #\Null))) + (declare (type character char)) + (if (whitespace-char-p char) + (read-char stream) + (return char))))) + +(defun make-extendable-string (&optional (size 10)) + "Make an extendable string which is a one-dimensional character + array which is adjustable and has a fill pointer" + (make-array size + :element-type 'character + :adjustable t + :fill-pointer 0)) + +(defun print-string-xml (string stream &key (start 0) end) + "Write the characters of string to stream using basic XML conventions" + (loop for offset upfrom start below (or end (length string)) + for char = (char string offset) + do (case char + (#\& (write-string "&" stream)) + (#\< (write-string "<" stream)) + (#\> (write-string ">" stream)) + (#\" (write-string """ stream)) + ((#\newline #\return #\tab) (write-char char stream)) + (t (if (and (<= 32 (char-code char)) + (<= (char-code char) 126)) + (write-char char stream) + (progn + (write-string "&#x" stream) + (write (char-code char) :stream stream :base 16) + (write-char #\; stream))))))) + +(defun make-standard-entities () + "A hashtable mapping XML entity names to their replacement strings, + filled with the standard set" + (let ((entities (make-hash-table :test #'equal))) + (setf (gethash "amp" entities) (string #\&) + (gethash "quot" entities) (string #\") + (gethash "apos" entities) (string #\') + (gethash "lt" entities) (string #\<) + (gethash "gt" entities) (string #\>) + (gethash "nbsp" entities) (string #\space)) + entities)) + +(defun resolve-entity (stream extendable-string entities entity) + "Read and resolve an XML entity from stream, positioned after the '&' entity marker, + accepting &name; &#DEC; and &#xHEX; formats, + destructively modifying string, which is also returned, + destructively modifying entity, incorrect entity formats result in errors" + (declare (type (vector character) entity)) + (loop + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (cond ((char= char #\Null) (error (parser-error "encountered eof before end of entity"))) + ((char= #\; char) (return)) + (t (vector-push-extend char entity))))) + (if (char= (char entity 0) #\#) + (let ((code (if (char= (char entity 1) #\x) + (parse-integer entity :start 2 :radix 16 :junk-allowed t) + (parse-integer entity :start 1 :radix 10 :junk-allowed t)))) + (when (null code) + (error (parser-error "encountered incorrect entity &~s;" (list entity) stream))) + (vector-push-extend (code-char code) extendable-string)) + (let ((value (gethash entity entities))) + (if value + (loop :for char :across value + :do (vector-push-extend char extendable-string)) + (error (parser-error "encountered unknown entity &~s;" (list entity) stream))))) + extendable-string) + +;;; namespace support + +(defvar *ignore-namespaces* nil + "When t, namespaces are ignored like in the old version of S-XML") + +(defclass xml-namespace () + ((uri :documentation "The URI used to identify this namespace" + :accessor get-uri + :initarg :uri) + (prefix :documentation "The preferred prefix assigned to this namespace" + :accessor get-prefix + :initarg :prefix + :initform nil) + (package :documentation "The Common Lisp package where this namespace's symbols are interned" + :accessor get-package + :initarg :package + :initform nil)) + (:documentation "Describes an XML namespace and how it is handled")) + +(setf (documentation 'get-uri 'function) + "The URI used to identify this namespace" + (documentation 'get-prefix 'function) + "The preferred prefix assigned to this namespace" + (documentation 'get-package 'function) + "The Common Lisp package where this namespace's symbols are interned") + +(defmethod print-object ((object xml-namespace) stream) + (print-unreadable-object (object stream :type t :identity t) + (format stream "~A - ~A" (get-prefix object) (get-uri object)))) + +(defvar *local-namespace* (make-instance 'xml-namespace + :uri "local" + :prefix "" + :package (find-package :keyword)) + "The local (global default) XML namespace") + +(defvar *xml-namespace* (make-instance 'xml-namespace + :uri "http://www.w3.org/XML/1998/namespace" + :prefix "xml" + :package (or (find-package :xml) + (make-package :xml :nicknames '("XML")))) + "REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.") + +(defvar *known-namespaces* (list *local-namespace* *xml-namespace*) + "The list of known/defined namespaces") + +(defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*)) + "Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable") + +(defun find-namespace (uri) + "Find a registered XML namespace identified by uri" + (find uri *known-namespaces* :key #'get-uri :test #'string-equal)) + +(defun register-namespace (uri prefix package) + "Register a new or redefine an existing XML namespace defined by uri with prefix and package" + (let ((namespace (find-namespace uri))) + (if namespace + (setf (get-prefix namespace) prefix + (get-package namespace) (find-package package)) + (push (setf namespace (make-instance 'xml-namespace + :uri uri + :prefix prefix + :package (find-package package))) + *known-namespaces*)) + namespace)) + +(defun find-namespace-binding (prefix namespaces) + "Find the XML namespace currently bound to prefix in the namespaces bindings" + (cdr (assoc prefix namespaces :test #'string-equal))) + +(defun split-identifier (identifier) + "Split an identifier 'prefix:name' and return (values prefix name)" + (when (symbolp identifier) + (setf identifier (symbol-name identifier))) + (let ((colon-position (position #\: identifier :test #'char=))) + (if colon-position + (values (subseq identifier 0 colon-position) + (subseq identifier (1+ colon-position))) + (values nil identifier)))) + +(defvar *require-existing-symbols* nil + "If t, each XML identifier must exist as symbol already") + +(defvar *auto-export-symbols* t + "If t, export newly interned symbols form their packages") + +(defun resolve-identifier (identifier namespaces &optional as-attribute) + "Resolve the string identifier in the list of namespace bindings" + (if *ignore-namespaces* + (intern identifier :keyword) + (flet ((intern-symbol (string package) ; intern string as a symbol in package + (if *require-existing-symbols* + (let ((symbol (find-symbol string package))) + (or symbol + (error "Symbol ~s does not exist in ~s" string package))) + (let ((symbol (intern string package))) + (when (and *auto-export-symbols* + (not (eql package (find-package :keyword)))) + (export symbol package)) + symbol)))) + (multiple-value-bind (prefix name) + (split-identifier identifier) + (if (or (null prefix) (string= prefix "xmlns")) + (if as-attribute + (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*)) + (let ((default-namespace (find-namespace-binding "" namespaces))) + (intern-symbol name (get-package default-namespace)))) + (let ((namespace (find-namespace-binding prefix namespaces))) + (if namespace + (intern-symbol name (get-package namespace)) + (error "namespace not found for prefix ~s" prefix)))))))) + +(defvar *auto-create-namespace-packages* t + "If t, new packages will be created for namespaces, if needed, named by the prefix") + +(defun new-namespace (uri &optional prefix) + "Register a new namespace for uri and prefix, creating a package if necessary" + (if prefix + (register-namespace uri + prefix + (or (find-package prefix) + (if *auto-create-namespace-packages* + (make-package prefix :nicknames `(,(string-upcase prefix))) + (error "Cannot find or create package ~s" prefix)))) + (let ((unique-name (loop :for i :upfrom 0 + :do (let ((name (format nil "ns-~d" i))) + (when (not (find-package name)) + (return name)))))) + (register-namespace uri + unique-name + (if *auto-create-namespace-packages* + (make-package (string-upcase unique-name) :nicknames `(,unique-name)) + (error "Cannot create package ~s" unique-name)))))) + +(defun extend-namespaces (attributes namespaces) + "Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings" + (unless *ignore-namespaces* + (let (default-namespace-uri) + (loop :for (key . value) :in attributes + :do (if (string= key "xmlns") + (setf default-namespace-uri value) + (multiple-value-bind (prefix name) + (split-identifier key) + (when (string= prefix "xmlns") + (let* ((uri value) + (prefix name) + (namespace (find-namespace uri))) + (unless namespace + (setf namespace (new-namespace uri prefix))) + (push `(,prefix . ,namespace) namespaces)))))) + (when default-namespace-uri + (let ((namespace (find-namespace default-namespace-uri))) + (unless namespace + (setf namespace (new-namespace default-namespace-uri))) + (push `("" . ,namespace) namespaces))))) + namespaces) + +(defun print-identifier (identifier stream &optional as-attribute) + "Print identifier on stream using namespace conventions" + (declare (ignore as-attribute) (special *namespaces*)) + (if *ignore-namespaces* + (princ identifier stream) + (if (symbolp identifier) + (let ((package (symbol-package identifier)) + (name (symbol-name identifier))) + (let* ((namespace (find package *known-namespaces* :key #'get-package)) + (prefix (or (car (find namespace *namespaces* :key #'cdr)) + (get-prefix namespace)))) + (if (string= prefix "") + (princ name stream) + (format stream "~a:~a" prefix name)))) + (princ identifier stream)))) + +;;; the parser state + +(defclass xml-parser-state () + ((entities :documentation "A hashtable mapping XML entity names to their replacement stings" + :accessor get-entities + :initarg :entities + :initform (make-standard-entities)) + (seed :documentation "The user seed object" + :accessor get-seed + :initarg :seed + :initform nil) + (buffer :documentation "The main reusable character buffer" + :accessor get-buffer + :initform (make-extendable-string)) + (mini-buffer :documentation "The secondary, smaller reusable character buffer" + :accessor get-mini-buffer + :initform (make-extendable-string)) + (new-element-hook :documentation "Called when new element starts" + ;; Handle the start of a new xml element with name and attributes, + ;; receiving seed from previous element (sibling or parent) + ;; return seed to be used for first child (content) + ;; or directly to finish-element-hook + :accessor get-new-element-hook + :initarg :new-element-hook + :initform #'(lambda (name attributes seed) + (declare (ignore name attributes)) + seed)) + (finish-element-hook :documentation "Called when element ends" + ;; Handle the end of an xml element with name and attributes, + ;; receiving parent-seed, the seed passed to us when this element started, + ;; i.e. passed to our corresponding new-element-hook + ;; and receiving seed from last child (content) + ;; or directly from new-element-hook + ;; return final seed for this element to next element (sibling or parent) + :accessor get-finish-element-hook + :initarg :finish-element-hook + :initform #'(lambda (name attributes parent-seed seed) + (declare (ignore name attributes parent-seed)) + seed)) + (text-hook :documentation "Called when text is found" + ;; Handle text in string, found as contents, + ;; receiving seed from previous element (sibling or parent), + ;; return final seed for this element to next element (sibling or parent) + :accessor get-text-hook + :initarg :text-hook + :initform #'(lambda (string seed) + (declare (ignore string)) + seed))) + (:documentation "The XML parser state passed along all code making up the parser")) + +(setf (documentation 'get-seed 'function) + "Get the initial user seed of an XML parser state" + (documentation 'get-entities 'function) + "Get the entities hashtable of an XML parser state" + (documentation 'get-new-element-hook 'function) + "Get the new element hook of an XML parser state" + (documentation 'get-finish-element-hook 'function) + "Get the finish element hook of an XML parser state" + (documentation 'get-text-hook 'function) + "Get the text hook of an XML parser state") + +#-allegro +(setf (documentation '(setf get-seed) 'function) + "Set the initial user seed of an XML parser state" + (documentation '(setf get-entities) 'function) + "Set the entities hashtable of an XML parser state" + (documentation '(setf get-new-element-hook) 'function) + "Set the new element hook of an XML parser state" + (documentation '(setf get-finish-element-hook) 'function) + "Set the finish element hook of an XML parser state" + (documentation '(setf get-text-hook) 'function) + "Set the text hook of an XML parser state") + +(defmethod get-mini-buffer :after ((state xml-parser-state)) + "Reset and return the reusable mini buffer" + (with-slots (mini-buffer) state + (setf (fill-pointer mini-buffer) 0))) + +(defmethod get-buffer :after ((state xml-parser-state)) + "Reset and return the main reusable buffer" + (with-slots (buffer) state + (setf (fill-pointer buffer) 0))) + +;;; parser support + +(defun parse-whitespace (stream extendable-string) + "Read and collect XML whitespace from stream in string which is + destructively modified, return first non-whitespace character which + was peeked but not read, return #\Null on eof" + (declare (type (vector character) extendable-string)) + (loop + (let ((char (peek-char nil stream nil #\Null))) + (declare (type character char)) + (if (whitespace-char-p char) + (vector-push-extend (read-char stream) extendable-string) + (return char))))) + +(defun parse-string (stream state string) + "Read and return an XML string from stream, delimited by either + single or double quotes, the stream is expected to be on the opening + delimiter, at the end the closing delimiter is also read, entities + are resolved, eof before end of string is an error" + (declare (type (vector character) string)) + (let ((delimiter (read-char stream nil #\Null)) + (char #\Null)) + (declare (type character delimiter char)) + (unless (or (char= delimiter #\') (char= delimiter #\")) + (error (parser-error "expected string delimiter" nil stream))) + (loop + (setf char (read-char stream nil #\Null)) + (cond ((char= char #\Null) (error (parser-error "encountered eof before end of string"))) + ((char= char delimiter) (return)) + ((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state))) + (t (vector-push-extend char string)))) + string)) + +(defun parse-text (stream state extendable-string) + "Read and collect XML text from stream in string which is + destructively modified, the text ends with a '<', which is peeked and + returned, entities are resolved, eof is considered an error" + (declare (type (vector character) extendable-string)) + (let ((char #\Null)) + (declare (type character char)) + (loop + (setf char (peek-char nil stream nil #\Null)) + (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) + (when (char= char #\<) (return)) + (read-char stream) + (if (char= char #\&) + (resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state)) + (vector-push-extend char extendable-string))) + char)) + +(defun parse-identifier (stream identifier) + "Read and returns an XML identifier from stream, positioned at the + start of the identifier, ending with the first non-identifier + character, which is peeked, the identifier is written destructively + into identifier which is also returned" + (declare (type (vector character) identifier)) + (loop + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (cond ((identifier-char-p char) + (vector-push-extend char identifier)) + (t + (when (char/= char #\Null) (unread-char char stream)) + (return identifier)))))) + +(defun skip-comment (stream) + "Skip an XML comment in stream, positioned after the opening '<!--', + consumes the closing '-->' sequence, unexpected eof or a malformed + closing sequence result in a error" + (let ((dashes-to-read 2)) + (loop + (if (zerop dashes-to-read) (return)) + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (if (char= char #\Null) + (error (parser-error "encountered unexpected eof for comment"))) + (if (char= char #\-) + (decf dashes-to-read) + (setf dashes-to-read 2))))) + (if (char/= (read-char stream nil #\Null) #\>) + (error (parser-error "expected > ending comment" nil stream)))) + +(defun read-cdata (stream state string) + "Reads in the CDATA and calls the callback for CDATA if it exists" + ;; we already read the <![CDATA[ stuff + ;; continue to read until we hit ]]> + (let ((char #\space) + (last-3-characters (list #\[ #\A #\T)) + (pattern (list #\> #\] #\]))) + (declare (type character char)) + (loop + (setf char (read-char stream nil #\Null)) + (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) + (push char last-3-characters) + (setf (cdddr last-3-characters) nil) + (cond + ((equal last-3-characters + pattern) + (setf (fill-pointer string) + (- (fill-pointer string) 2)) + (setf (get-seed state) + (funcall (get-text-hook state) + (copy-seq string) + (get-seed state))) + (return-from read-cdata)) + (t + (vector-push-extend char string)))))) + +(defun skip-special-tag (stream state) + "Skip an XML special tag (comments and processing instructions) in + stream, positioned after the opening '<', unexpected eof is an error" + ;; opening < has been read, consume ? or ! + (read-char stream) + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + ;; see if we are dealing with a comment + (when (char= char #\-) + (setf char (read-char stream nil #\Null)) + (when (char= char #\-) + (skip-comment stream) + (return-from skip-special-tag))) + ;; maybe we are dealing with CDATA? + (when (and (char= char #\[) + (loop :for pattern :across "CDATA[" + :for char = (read-char stream nil #\Null) + :when (char= char #\Null) :do + (error (parser-error "encountered unexpected eof in cdata")) + :always (char= char pattern))) + (read-cdata stream state (get-buffer state)) + (return-from skip-special-tag)) + ;; loop over chars, dealing with strings (skipping their content) + ;; and counting opening and closing < and > chars + (let ((taglevel 1) + (string-delimiter #\Null)) + (declare (type character string-delimiter)) + (loop + (when (zerop taglevel) (return)) + (setf char (read-char stream nil #\Null)) + (when (char= char #\Null) + (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream))) + (if (char/= string-delimiter #\Null) + ;; inside a string we only look for a closing string delimiter + (when (char= char string-delimiter) + (setf string-delimiter #\Null)) + ;; outside a string we count < and > and watch out for strings + (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char)) + ((char= char #\<) (incf taglevel)) + ((char= char #\>) (decf taglevel)))))))) + +;;; the XML parser proper + +(defun parse-xml-element-attributes (stream state) + "Parse XML element attributes from stream positioned after the tag + identifier, returning the attributes as an assoc list, ending at + either a '>' or a '/' which is peeked and also returned" + (declare (special *namespaces*)) + (let ((char #\Null) attributes) + (declare (type character char)) + (loop + ;; skip whitespace separating items + (setf char (skip-whitespace stream)) + ;; start tag attributes ends with > or /> + (when (or (char= char #\>) (char= char #\/)) (return)) + ;; read the attribute key + (let ((key (let ((string (parse-identifier stream (get-mini-buffer state)))) + (if *ignore-namespaces* + (funcall *attribute-name-parser* string) + (copy-seq string))))) + ;; skip separating whitespace + (setf char (skip-whitespace stream)) + ;; require = sign (and consume it if present) + (if (char= char #\=) + (read-char stream) + (error (parser-error "expected =" nil stream))) + ;; skip separating whitespace + (skip-whitespace stream) + ;; read the attribute value as a string + (push (cons key (let ((string (parse-string stream state (get-buffer state)))) + (if *ignore-namespaces* + (funcall *attribute-value-parser* key string) + (copy-seq string)))) + attributes))) + ;; return attributes peek char ending loop + (values attributes char))) + +(defun parse-xml-element (stream state) + "Parse and return an XML element from stream, positioned after the opening '<'" + (declare (special *namespaces*)) + ;; opening < has been read + (when (char= (peek-char nil stream nil #\Null) #\!) + (skip-special-tag stream state) + (return-from parse-xml-element)) + (let ((char #\Null) buffer open-tag parent-seed has-children) + (declare (type character char)) + (setf parent-seed (get-seed state)) + ;; read tag name (no whitespace between < and name ?) + (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state)))) + ;; tag has been read, read attributes if any + (multiple-value-bind (attributes peeked-char) + (parse-xml-element-attributes stream state) + (let ((*namespaces* (extend-namespaces attributes *namespaces*))) + (setf open-tag (resolve-identifier open-tag *namespaces*)) + (unless *ignore-namespaces* + (dolist (attribute attributes) + (setf (car attribute) (funcall *attribute-name-parser* (car attribute)) + (cdr attribute) (funcall *attribute-value-parser* (car attribute) (cdr attribute))))) + (setf (get-seed state) (funcall (get-new-element-hook state) + open-tag attributes (get-seed state))) + (setf char peeked-char) + (when (char= char #\/) + ;; handle solitary tag of the form <tag .. /> + (read-char stream) + (setf char (read-char stream nil #\Null)) + (if (char= #\> char) + (progn + (setf (get-seed state) (funcall (get-finish-element-hook state) + open-tag attributes parent-seed (get-seed state))) + (return-from parse-xml-element)) + (error (parser-error "expected >" nil stream)))) + ;; consume > + (read-char stream) + (loop + (setf buffer (get-buffer state)) + ;; read whitespace into buffer + (setf char (parse-whitespace stream buffer)) + ;; see what ended the whitespace scan + (cond ((char= char #\Null) (error (parser-error "encountered unexpected eof handling ~a" + (list open-tag)))) + ((char= char #\<) + ;; consume the < + (read-char stream) + (if (char= (peek-char nil stream nil #\Null) #\/) + (progn + ;; handle the matching closing tag </tag> and done + ;; if we read whitespace as this (leaf) element's contents, it is significant + (when (and (not has-children) (plusp (length buffer))) + (setf (get-seed state) (funcall (get-text-hook state) + (copy-seq buffer) (get-seed state)))) + (read-char stream) + (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state)) + *namespaces*))) + (unless (eq open-tag close-tag) + (error (parser-error "found <~a> not matched by </~a> but by <~a>" + (list open-tag open-tag close-tag) stream))) + (unless (char= (read-char stream nil #\Null) #\>) + (error (parser-error "expected >" nil stream))) + (setf (get-seed state) (funcall (get-finish-element-hook state) + open-tag attributes parent-seed (get-seed state)))) + (return)) + ;; handle child tag and loop, no hooks to call here + ;; whitespace between child elements is skipped + (progn + (setf has-children t) + (parse-xml-element stream state)))) + (t + ;; no child tag, concatenate text to whitespace in buffer + ;; handle text content and loop + (setf char (parse-text stream state buffer)) + (setf (get-seed state) (funcall (get-text-hook state) + (copy-seq buffer) (get-seed state)))))))))) + +(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state))) + "Parse and return a toplevel XML element from stream, using parser state" + (loop + (let ((char (skip-whitespace stream))) + (when (char= char #\Null) (return-from start-parse-xml)) + ;; skip whitespace until start tag + (unless (char= char #\<) + (error (parser-error "expected <" nil stream))) + (read-char stream) ; consume peeked char + (setf char (peek-char nil stream nil #\Null)) + (if (or (char= char #\!) (char= char #\?)) + ;; deal with special tags + (skip-special-tag stream state) + (progn + ;; read the main element + (parse-xml-element stream state) + (return-from start-parse-xml (get-seed state))))))) + +;;;; eof |