//
// Sample program that displays an XML file in a GtkTextView.
//
// Copyright (C) 2008 Emmanuel Rodriguez
//
// This program is free software; you can redistribute it and/or modify it under
// the same terms as Perl itself, either Perl version 5.8.8 or, at your option,
// any later version of Perl 5 you may have available.
//
//
#include "code.h"
#include "logger.h"
#include "libxml.h"
#include <string.h>
#define buffer_add(xargs, tag, text) my_buffer_add(xargs, tag, NULL, text)
#define buffer_add_node(xargs, tag, node, text) my_buffer_add(xargs, tag, node, text)
#define buffer_cat(xargs, tag, ...) \
do { \
gchar *content = g_strconcat(__VA_ARGS__, NULL); \
my_buffer_add(xargs, tag, NULL, content); \
g_free(content); \
} while (FALSE)
#define ELEMENT_MATCH(a, b) (a)->type == XML_ELEMENT_NODE \
&& xmlStrEqual((a)->name, (b)->name) \
&& (a)->ns == (b)->ns
// The icon type to use for an element
#define ICON_ELEMENT "gtk-directory"
// The markup styles to be used
typedef struct _MarkupTags {
GtkTextTag *result_count;
GtkTextTag *boolean;
GtkTextTag *number;
GtkTextTag *attribute_name;
GtkTextTag *attribute_value;
GtkTextTag *comment;
GtkTextTag *dtd;
GtkTextTag *element;
GtkTextTag *pi;
GtkTextTag *pi_data;
GtkTextTag *syntax;
GtkTextTag *literal;
GtkTextTag *cdata;
GtkTextTag *cdata_content;
GtkTextTag *namespace_name;
GtkTextTag *namespace_uri;
GtkTextTag *entity_ref;
GtkTextTag *error;
} MarkupTags;
// The context used for displaying the XML. Since a lot of functions need these
// parameters, it's easier to group them in a custom struct and pass that struct
// around.
typedef struct _TextRenderCtx {
// The GTK text buffer on which to perform the rendering
GtkTextBuffer *buffer;
// The markup tags defined in the text buffer
MarkupTags *markup;
// Perl hash with the namespaces to use (key: uri, value: prefix)
HV *namespaces;
// Contents of the XML document (it gets build at runtime)
GString *xml_data;
// Current position on the XML document. It counts the characters (not the
// bytes) accumulated. This counter keeps track of the characters already
// present in the buffer. It's purpose is to provide the position where to
// apply the text tags (syntax highlighting styles).
guint buffer_pos;
// The tags to apply (collected at runtime as the XML document gets built).
GArray *tags;
// Statistics used for debugging purposes
gsize calls;
} TextRenderCtx;
//
// The text styles to apply for the syntax highlighting of the XML.
//
typedef struct _ApplyTag {
GtkTextTag *tag;
gsize start;
gsize end;
gchar *name;
} ApplyTag;
//
// The context used for populating the DOM tree.
//
typedef struct _TreeRenderCtx {
// The GTK tree store to fill
GtkTreeStore *store;
// Perl hash with the namespaces to use (key: uri, value: prefix)
HV *namespaces;
// ProxyNode used by XML::LibXML
ProxyNode *proxy;
// Statistics used for debugging purposes
gsize calls;
} TreeRenderCtx;
//
// Function prototypes
//
static MarkupTags* my_get_buffer_tags (GtkTextBuffer *buffer);
static gchar* my_to_string (xmlNode *node);
static void my_buffer_add (TextRenderCtx *xargs, GtkTextTag *tag, xmlNode *node, const gchar *text);
static void my_display_document_syntax (TextRenderCtx *xargs, xmlNode *node);
static gchar* my_get_node_name_prefixed (xmlNode *node, HV *namespaces);
static const gchar* my_get_uri_prefix (const xmlChar *uri, HV *namespaces);
static void my_render_buffer (TextRenderCtx *xargs);
static void my_add_text_and_entity (TextRenderCtx *xargs, GString *buffer, GtkTextTag *markup, const gchar *entity);
static void my_populate_tree_store (TreeRenderCtx *xargs, xmlNode *node, GtkTreeIter *parent, gint pos);
static void my_XML_DOCUMENT_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_HTML_DOCUMENT_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_ELEMENT_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_ATTRIBUTE_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_ATTRIBUTE_VALUE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_TEXT_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_COMMENT_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_CDATA_SECTION_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_PI_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_ENTITY_REF_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_ENTITY_REF_VALUE (TextRenderCtx *xargs, const gchar *name);
static void my_XML_DTD_NODE (TextRenderCtx *xargs, xmlNode *node);
static void my_XML_NAMESPACE_DECL (TextRenderCtx *xargs, xmlNs *ns);
//
// This function displays a simplified version of the DOM tree of an XML node
// into a GtkTreeStore. The XML nodes are displayed with their corresponding
// namespace prefix. The prefixes to use are taken from the given Perl hash.
//
// At the moment the DOM shows only the XML Elements. All other nodes are not
// rendered. If an element defines an attribute that's an ID (with xml:id or
// through the DTD) then the ID will be displayed.
//
void xacobeo_populate_gtk_tree_store (GtkTreeStore *store, xmlNode *node, HV *namespaces) {
////
// Parameters validation
// Initialize the tree store
if (store == NULL) {
WARN("GtkTreeStore is NULL");
return;
}
gtk_tree_store_clear(store);
// Get the root element
if (node == NULL) {
WARN("XML node is NULL");
return;
}
xmlNode *root = xmlDocGetRootElement(node->doc);
if (root == NULL) {
WARN("Document has no root element");
return;
}
DEBUG("Adding root element %s", root->name);
// The argument contenxt
TreeRenderCtx xargs = {
.store = store,
.namespaces = namespaces,
.calls = 0,
.proxy = PmmOWNERPO(PmmPROXYNODE(node)),
};
// Populate the DOM tree (timed)
DEBUG("Populating DOM tree");
GTimeVal start, end;
g_get_current_time(&start);
my_populate_tree_store(&xargs, root, NULL, 0);
g_get_current_time(&end);
// Calculate the number of micro seconds spent since the last time
glong elapsed = (end.tv_sec - start.tv_sec) * 1000000; // Seconds
elapsed += end.tv_usec - start.tv_usec; // Microseconds
INFO("Calls = %d, Time = %ld, Frequency = %05f Time/Calls", xargs.calls, elapsed, (elapsed/(1.0 * xargs.calls)));
}
//
// This functions inserts recursively nodes into a TreStore. It takes as input
// XML Elements.
//
static void my_populate_tree_store (TreeRenderCtx *xargs, xmlNode *node, GtkTreeIter *parent, gint pos) {
++xargs->calls;
gchar *node_name = my_get_node_name_prefixed(node, xargs->namespaces);
SV *sv = NULL;
if (xargs->namespaces) {
// This part is optional because the C main wrapper used for testing can't
// deal with the creation of an SV.
sv = PmmNodeToSv(node, xargs->proxy);
}
// Find out if the node has an attribute that's an ID
gboolean done = FALSE;
GtkTreeIter iter;
for (xmlAttr *attr = node->properties; attr; attr = attr->next) {
if (xmlIsID(node->doc, node, attr)) {
done = TRUE;
gchar *id_name = my_get_node_name_prefixed((xmlNode *) attr, xargs->namespaces);
// If we pass 'attr' then the output will be "id='23'" instead of "23"
gchar *id_value = my_to_string((xmlNode *) attr->children);
// Add the current node
gtk_tree_store_insert_with_values(
xargs->store, &iter, parent, pos,
DOM_COL_ICON, ICON_ELEMENT,
DOM_COL_XML_POINTER, sv,
DOM_COL_ELEMENT_NAME, node_name,
// Add the columns ID_NAME and ID_VALUE
DOM_COL_ID_NAME, id_name,
DOM_COL_ID_VALUE, id_value,
-1
);
g_free(id_name);
g_free(id_value);
break;
}
}
// Add the current node if it wasn't already added
if (! done) {
gtk_tree_store_insert_with_values(
xargs->store, &iter, parent, pos,
DOM_COL_ICON, ICON_ELEMENT,
DOM_COL_XML_POINTER, sv,
DOM_COL_ELEMENT_NAME, node_name,
-1
);
}
g_free(node_name);
// Do the children
gint i = 0;
for (xmlNode *child = node->children; child; child = child->next) {
if(child->type == XML_ELEMENT_NODE) {
my_populate_tree_store(xargs, child, &iter, i++);
}
}
}
//
// This function displays an XML node into a GtkTextBuffer. The XML nodes are
// displayed with their corresponding namespace prefix. The prefixes to use are
// taken from the given Perl hash.
//
// The XML is rendered with syntax highlighting. The GtkTextBuffer is expected
// to have the styles already predefined. The name of the styles to be used are:
//
// XPath results:
// result_count - Margin counter used to identify each XPath result.
// boolean - Boolean result from an XPath expression.
// number - Numerical result from an XPath expression.
// literal - Literal result (string) from an XPath expression.
//
// XML Elements
// element - An XML element (both opening and closing tag).
// attribute_name - The name of an attribute.
// attribute_value - The value of an attribute.
// namespace_name - The name (prefix) of a namespace declaration.
// namespace_uri - The URI of a namespace declaration.
//
// XML syntax
// comment - An XML comment.
// dtd - A DTD definition.
// pi - The name of a processing instruction.
// pi_data - The data of a processing instruction.
// syntax - Syntax tokens : <, >, &, ;, etc.
// cdata - A CDATA (both opening and closing syntax).
// cdata_content - The content of a CDATA.
// entity_ref - an entity reference.
//
void xacobeo_populate_gtk_text_buffer (GtkTextBuffer *buffer, xmlNode *node, HV *namespaces) {
////
// Parameters validation
if (buffer == NULL) {
WARN("GtkTextBuffer is NULL");
return;
}
TextRenderCtx xargs = {
.buffer = buffer,
.markup = my_get_buffer_tags(buffer),
.namespaces = namespaces,
.xml_data = g_string_sized_new(5 * 1024),
.buffer_pos = 0,
// A 400Kb document can require to apply up to 150 000 styles!
.tags = g_array_sized_new(TRUE, TRUE, sizeof(ApplyTag), 200 * 1000),
.calls = 0,
};
// Compute the current position in the buffer
GtkTextIter iter;
gtk_text_buffer_get_end_iter(buffer, &iter);
xargs.buffer_pos = gtk_text_iter_get_offset(&iter);
DEBUG("Displaying document with syntax highlighting");
GTimeVal start;
g_get_current_time(&start);
// Render the XML document
DEBUG("Computing syntax highlighting");
my_display_document_syntax(&xargs, node);
g_free(xargs.markup);
// Copy the text into the buffer
gsize tags = xargs.tags->len;
DEBUG("Applying syntax highlighting");
my_render_buffer(&xargs);
GTimeVal end;
g_get_current_time(&end);
// Calculate the number of micro seconds spent since the last time
glong elapsed = (end.tv_sec - start.tv_sec) * 1000000; // Seconds
elapsed += end.tv_usec - start.tv_usec; // Microseconds
INFO("Calls = %d, Tags = %d, Time = %ld, Frequency = %05f Time/Calls", xargs.calls, tags, elapsed, (elapsed/(1.0 * xargs.calls)));
}
//
// Adds the contents of the XML document to the buffer and applies the syntax
// highlighting.
//
// This function frees the data members 'xml_data' and 'tags'.
//
static void my_render_buffer (TextRenderCtx *xargs) {
// Insert the whole text into the buffer
GtkTextIter iter_end;
gtk_text_buffer_get_end_iter(xargs->buffer, &iter_end);
gtk_text_buffer_insert(
xargs->buffer, &iter_end,
xargs->xml_data->str, xargs->xml_data->len
);
g_string_free(xargs->xml_data, TRUE);
// Apply each tag individually
// It's a bit faster to emit the signal "apply-tag" than to call
// gtk_text_buffer_apply_tag().
guint signal_apply_tag_id = g_signal_lookup("apply-tag", GTK_TYPE_TEXT_BUFFER);
for (size_t i = 0; i < xargs->tags->len; ++i) {
ApplyTag *to_apply = &g_array_index(xargs->tags, ApplyTag, i);
if (! to_apply) {
break;
}
GtkTextIter iter_start;
gtk_text_buffer_get_iter_at_offset(xargs->buffer, &iter_start, to_apply->start);
gtk_text_buffer_get_iter_at_offset(xargs->buffer, &iter_end, to_apply->end);
if (to_apply->name) {
gchar *name;
name = g_strjoin("|", to_apply->name, "start", NULL);
gtk_text_buffer_create_mark(xargs->buffer, name, &iter_start, TRUE);
g_free(name);
name = g_strjoin("|", to_apply->name, "end", NULL);
gtk_text_buffer_create_mark(xargs->buffer, name, &iter_end, FALSE);
g_free(name);
g_free(to_apply->name);
}
// This is the bottleneck of the function. On the #gedit IRC channel it was
// suggested that the highlight could be done in an idle callback.
g_signal_emit(xargs->buffer, signal_apply_tag_id, 0, to_apply->tag, &iter_start, &iter_end);
}
g_array_free(xargs->tags, TRUE);
}
//
// Displays an XML document by walking recursively through the DOM. The XML is
// displayed in a GtkTextBuffer and rendered with a corresponding markup rule.
//
static void my_display_document_syntax (TextRenderCtx *xargs, xmlNode *node) {
if (node == NULL) {
buffer_add(xargs, xargs->markup->error, "\n");
}
switch (node->type) {
case XML_DOCUMENT_NODE:
my_XML_DOCUMENT_NODE(xargs, node);
break;
case XML_HTML_DOCUMENT_NODE:
my_XML_HTML_DOCUMENT_NODE(xargs, node);
break;
case XML_ELEMENT_NODE:
my_XML_ELEMENT_NODE(xargs, node);
break;
case XML_ATTRIBUTE_NODE:
my_XML_ATTRIBUTE_NODE(xargs, node);
break;
case XML_TEXT_NODE:
my_XML_TEXT_NODE(xargs, node);
break;
case XML_COMMENT_NODE:
my_XML_COMMENT_NODE(xargs, node);
break;
case XML_CDATA_SECTION_NODE:
my_XML_CDATA_SECTION_NODE(xargs, node);
break;
case XML_PI_NODE:
my_XML_PI_NODE(xargs, node);
break;
case XML_ENTITY_REF_NODE:
my_XML_ENTITY_REF_NODE(xargs, node);
break;
case XML_DTD_NODE:
my_XML_DTD_NODE(xargs, node);
break;
default:
WARN("Unknown XML type %d for %s", node->type, node->name);
break;
}
}
// Displays a 'Document' node.
static void my_XML_DOCUMENT_NODE (TextRenderCtx *xargs, xmlNode *node) {
// Create the XML declaration <?xml version="" encoding=""?>
xmlDoc *doc = (xmlDoc *) node;
GString *gstring = g_string_sized_new(30);
g_string_printf(gstring, "version=\"%s\" encoding=\"%s\"",
doc->version,
doc->encoding ? (gchar *) doc->encoding : "UTF-8"
);
gchar *piBuffer = g_string_free(gstring, FALSE);
xmlNode *pi = xmlNewPI(BAD_CAST "xml", BAD_CAST piBuffer);
g_free(piBuffer);
my_display_document_syntax(xargs, pi);
xmlFreeNode(pi);
buffer_add(xargs, xargs->markup->syntax, "\n");
for (xmlNode *child = node->children; child; child = child->next) {
my_display_document_syntax(xargs, child);
// Add some new lines between the elements of the prolog. Libxml removes
// the white spaces in the prolog.
if (child != node->last) {
buffer_add(xargs, xargs->markup->syntax, "\n");
}
}
}
// Displays an HTML 'Document' node.
static void my_XML_HTML_DOCUMENT_NODE (TextRenderCtx *xargs, xmlNode *node) {
for (xmlNode *child = node->children; child; child = child->next) {
my_display_document_syntax(xargs, child);
// Add some new lines between the elements of the prolog. Libxml removes
// the white spaces in the prolog.
if (child != node->last) {
buffer_add(xargs, xargs->markup->syntax, "\n");
}
}
}
// Displays an Element ex: <tag>...</tag>
static void my_XML_ELEMENT_NODE (TextRenderCtx *xargs, xmlNode *node) {
gchar *name = my_get_node_name_prefixed(node, xargs->namespaces);
// Start of the element
buffer_add(xargs, xargs->markup->syntax, "<");
buffer_add_node(xargs, xargs->markup->element, node, name);
// The element's namespace definitions
for (xmlNs *ns = node->nsDef; ns; ns = ns->next) {
my_XML_NAMESPACE_DECL(xargs, ns);
}
// The element's attributes
for (xmlAttr *attr = node->properties; attr; attr = attr->next) {
my_XML_ATTRIBUTE_NODE(xargs, (xmlNode *) attr);
}
// An element can be closed with <element></element> or <element/>
if (node->children) {
// Close the start of the element
buffer_add(xargs, xargs->markup->syntax, ">");
// Do the children
for (xmlNode *child = node->children; child; child = child->next) {
my_display_document_syntax(xargs, child);
}
// Close the element
buffer_add(xargs, xargs->markup->syntax, "</");
buffer_add(xargs, xargs->markup->element, name);
buffer_add(xargs, xargs->markup->syntax, ">");
}
else {
// Empty element, ex: <empty />
// TODO only elements defined as empty in the DTD shoud be empty. The others
// should be written as: <no-content></no-content>
buffer_add(xargs, xargs->markup->syntax, "/>");
}
g_free(name);
}
// Displays a Nanespace declaration ex: <... xmlns:x="http://www.w3.org/1999/xhtml" ...>
static void my_XML_NAMESPACE_DECL (TextRenderCtx *xargs, xmlNs *ns) {
const gchar *prefix = my_get_uri_prefix(ns->href, xargs->namespaces);
gchar *name = NULL;
if (prefix) {
name = g_strconcat("xmlns:", prefix, NULL);
}
else {
name = g_strdup("xmlns");
}
buffer_add(xargs, xargs->markup->syntax, " ");
buffer_add(xargs, xargs->markup->namespace_name, name);
g_free(name);
// Value
buffer_add(xargs, xargs->markup->syntax, "=\"");
buffer_add(xargs, xargs->markup->namespace_uri, (gchar *) ns->href);
buffer_add(xargs, xargs->markup->syntax, "\"");
}
// Displays an Attribute ex: <... var="value" ...>
static void my_XML_ATTRIBUTE_NODE (TextRenderCtx *xargs, xmlNode *node) {
// Name
gchar *name = my_get_node_name_prefixed(node, xargs->namespaces);
buffer_add(xargs, xargs->markup->syntax, " ");
buffer_add(xargs, xargs->markup->attribute_name, name);
g_free(name);
// Value
buffer_add(xargs, xargs->markup->syntax, "=\"");
my_XML_ATTRIBUTE_VALUE(xargs, node);
buffer_add(xargs, xargs->markup->syntax, "\"");
}
//
// This method is inspired by xmlGetPropNodeValueInternal(). This version adds
// the contents to the internal buffer and renders the entities of the
// attributes.
//
static void my_XML_ATTRIBUTE_VALUE (TextRenderCtx *xargs, xmlNode *node) {
if (node->type == XML_ATTRIBUTE_NODE) {
for (xmlNode *child = node->children; child; child = child->next) {
my_display_document_syntax(xargs, child);
}
}
else if (node->type == XML_ATTRIBUTE_DECL) {
xmlAttribute *child = (xmlAttribute *) node;
buffer_add(xargs, xargs->markup->attribute_value, (gchar *) child->defaultValue);
}
}
// Displays a Text node, plain text in the document.
//
// This is tricky as plain text needs to have some characters (<, >, &, ' and ")
// escaped. Furthermore, not all characters need to be always escaped, for
// instance when the TEXT node is a direct child of an ELEMENT then < and & need
// to be escaped (> is optional as an XML parser should only look for the next
// opening tag). But if the TEXT node is within an ATTRIBUTE then the proper
// quotes also need to be escaped.
//
// Another important aspect is the visual representation. As TEXT nodes are used
// everywhere they don't have a dedicated style, instead their style is dictated
// by the parent node.
//
static void my_XML_TEXT_NODE (TextRenderCtx *xargs, xmlNode *node) {
// The type of text node rendering to do (Attribute, Element, etc)
gboolean do_quotes = FALSE;
GtkTextTag *markup = NULL; // NULL -> no style
if (node->parent) {
switch (node->parent->type) {
case XML_ELEMENT_NODE:
// Use the default values - Nothing more to do
break;
case XML_ATTRIBUTE_NODE:
case XML_ATTRIBUTE_DECL:
markup = xargs->markup->attribute_value;
do_quotes = TRUE;
break;
default:
WARN("Unhandled TEXT node for type %d", node->parent->type);
break;
}
}
const gchar *p = (gchar *) node->content;
size_t length = strlen(p);
const gchar *end = p + length;
// The text should be added to a temporary buffer first and appended to the
// main buffer (xargs->buffer) before rendering each entity. Otherwise each
// character in the TEXT node will be tagged one by one! Of course the output
// will be the same but it's overkill.
GString *buffer = g_string_sized_new(length);
// Scan the string for the characters to escape
while (p != end) {
const gchar *next = g_utf8_next_char(p);
switch (*p) {
case '&':
my_add_text_and_entity(xargs, buffer, markup, "amp");
break;
case '<':
my_add_text_and_entity(xargs, buffer, markup, "lt");
break;
case '>':
my_add_text_and_entity(xargs, buffer, markup, "gt");
break;
default: {
gboolean append = TRUE;
if (do_quotes) {
append = FALSE;
switch (*p) {
case '\'':
my_add_text_and_entity(xargs, buffer, markup, "apos");
break;
case '"':
my_add_text_and_entity(xargs, buffer, markup, "quot");
break;
default:
// Append the UTF-8 character as it is to the buffer
append = TRUE;
break;
}
}
// Keep the UTF-8 character unchanged
if (append) {
g_string_append_len(buffer, p, next - p);
}
}
break;
}
p = next;
}
// Write the last bytes in the buffer
buffer_add(xargs, markup, buffer->str);
g_string_free(buffer, TRUE);
}
//
// Helper function for my_XML_TEXT_NODE() it ensures that the temporary buffer
// is merged into the main buffer before an entity is written.
//
static void my_add_text_and_entity (TextRenderCtx *xargs, GString *buffer, GtkTextTag *markup, const gchar *entity) {
buffer_add(xargs, markup, buffer->str);
g_string_truncate(buffer, 0);
my_XML_ENTITY_REF_VALUE(xargs, entity);
}
// Displays a Comment ex: <!-- comment -->
static void my_XML_COMMENT_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_cat(xargs, xargs->markup->comment, "<!--", (gchar *) node->content, "-->");
}
// Displays a CDATA section ex: <![CDATA[<greeting>Hello, world!</greeting>]]>
static void my_XML_CDATA_SECTION_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_add(xargs, xargs->markup->cdata, "<![CDATA[");
buffer_add(xargs, xargs->markup->cdata_content, (gchar *) node->content);
buffer_add(xargs, xargs->markup->cdata, "]]>");
}
// Displays a PI (processing instruction) ex: <?stuff ?>
static void my_XML_PI_NODE (TextRenderCtx *xargs, xmlNode *node) {
buffer_add(xargs, xargs->markup->syntax, "<?");
buffer_add(xargs, xargs->markup->pi, (gchar *) node->name);
// Add the data if there's something
if (node->content) {
buffer_add(xargs, xargs->markup->syntax, " ");
buffer_add(xargs, xargs->markup->pi_data,(gchar *) node->content);
}
buffer_add(xargs, xargs->markup->syntax, "?>");
}
// Displays an Entity ex: &entity;
static void my_XML_ENTITY_REF_NODE (TextRenderCtx *xargs, xmlNode *node) {
my_XML_ENTITY_REF_VALUE(xargs, (gchar *) node->name);
}
// Performs the actual display of an Entity ex: &my-chunk;
static void my_XML_ENTITY_REF_VALUE (TextRenderCtx *xargs, const gchar *name) {
buffer_add(xargs, xargs->markup->syntax, "&");
buffer_add(xargs, xargs->markup->entity_ref, name);
buffer_add(xargs, xargs->markup->syntax, ";");
}
// Displays a DTD ex: <!DOCTYPE NewsML PUBLIC ...>
static void my_XML_DTD_NODE (TextRenderCtx *xargs, xmlNode *node) {
// TODO the DTD node has children, so it's possible to have more advanced
// syntax highlighting.
gchar *content = my_to_string(node);
buffer_add(xargs, xargs->markup->dtd, content);
g_free(content);
}
//
// Returns the node name with the right prefix based on the namespaces declared
// in the document. If the node has no namespace then the node name is return
// without a prefix (although the string still needs to be freed).
//
// This function returns an object that has to be freed with g_free().
//
static gchar* my_get_node_name_prefixed (xmlNode *node, HV *namespaces) {
gchar *name = (gchar *) node->name;
if (node->ns) {
// Get the prefix corresponding to the namespace
const gchar *prefix = my_get_uri_prefix(node->ns->href, namespaces);
if (prefix) {
name = g_strconcat(prefix, ":", name, NULL);
}
else {
name = g_strdup(name);
}
}
else {
// The node has no namespace so we use the name
name = g_strdup(name);
}
return name;
}
//
// Returns the prefix to use for the given URI. The prefix is chosen based on
// the namespaces declared in the document. If the prefix can't be found then
// NULL is returned.
//
// The string returned by this function shouldn't be modified nor freed.
//
static const gchar* my_get_uri_prefix (const xmlChar *uri, HV *namespaces) {
// Reset of the namespace to the default namespace
if (xmlStrEqual(uri, BAD_CAST "")) {
return NULL;
}
// Get the prefix corresponding to the namespace
SV **svPtr = hv_fetch(namespaces, (gchar *) uri, xmlStrlen(uri), FALSE);
if (!svPtr) {
// Can't find the prefix of the URI
WARN("Can't find namespace for URI %s", uri);
return NULL;
}
if (SvTYPE(*svPtr) != SVt_PV) {
// Prefix isn't a string, something else was stored in the hash
WARN("No valid namespace associated with URI %s, got: '%s'", uri, SvPV_nolen(*svPtr));
return NULL;
}
// Ok found the prefix!
return SvPVX(*svPtr);
}
//
// Returns a string representation of the given node.
//
// This function returns a string that has to be freed with g_free().
//
static gchar* my_to_string (xmlNode *node) {
// Get the text representation of the XML node
xmlBuffer *buffer = xmlBufferCreate();
int old_indent = xmlIndentTreeOutput;
xmlIndentTreeOutput = 1;
int level = 0;
int format = 0;
xmlNodeDump(buffer, node->doc, node, level, format);
xmlIndentTreeOutput = old_indent;
// Transform the string to a glib string
const gchar *content = (const gchar *) xmlBufferContent(buffer);
gchar *string = g_strdup(content);
xmlBufferFree(buffer);
return string;
}
//
// Adds a text chunk to the buffer. The text is added with a markup tag (style).
//
// Normally the function gtk_text_buffer_insert_with_tags() should be used for
// this purpose. The problem is that inserting data by chunks into the text
// buffer is really slow. Also applying the style elements is taking a lot of
// time.
//
// So far the best way for insterting the data into the buffer is to collect it
// all into a string and to add the single string with the contents of the
// document into the buffer. Once the buffer is filled the styles can be
// applied.
//
static void my_buffer_add (TextRenderCtx *xargs, GtkTextTag *tag, xmlNode *node, const gchar *text) {
const gchar *content = text ? text : "";
++xargs->calls;
g_string_append(xargs->xml_data, content);
// We don't want the length of the string but the number of characters.
// UTF-8 may encode one character as multiple bytes.
glong end = xargs->buffer_pos + g_utf8_strlen(content, -1);
gchar *name = node ? xacobeo_get_node_mark(node) : NULL;
// Apply the markup if there's a tag
if (tag) {
ApplyTag to_apply = {
.tag = tag,
.start = xargs->buffer_pos,
.end = end,
.name = name,
};
g_array_append_val(xargs->tags, to_apply);
}
xargs->buffer_pos = end;
}
//
// Gets the markup rules to use for rendering the XML with syntax highlighting.
// The markup rules are expected to be already defined in the buffer as tags.
//
// This function returns an object that has to be freed with g_free().
//
static MarkupTags* my_get_buffer_tags (GtkTextBuffer *buffer) {
MarkupTags *markup = g_new0(MarkupTags, 1);
GtkTextTagTable *table = gtk_text_buffer_get_tag_table(buffer);
markup->result_count = gtk_text_tag_table_lookup(table, "result_count");
markup->boolean = gtk_text_tag_table_lookup(table, "boolean");
markup->number = gtk_text_tag_table_lookup(table, "number");
markup->literal = gtk_text_tag_table_lookup(table, "literal");
markup->attribute_name = gtk_text_tag_table_lookup(table, "attribute_name");
markup->attribute_value = gtk_text_tag_table_lookup(table, "attribute_value");
markup->comment = gtk_text_tag_table_lookup(table, "comment");
markup->dtd = gtk_text_tag_table_lookup(table, "dtd");
markup->element = gtk_text_tag_table_lookup(table, "element");
markup->pi = gtk_text_tag_table_lookup(table, "pi");
markup->pi_data = gtk_text_tag_table_lookup(table, "pi_data");
markup->syntax = gtk_text_tag_table_lookup(table, "syntax");
markup->cdata = gtk_text_tag_table_lookup(table, "cdata");
markup->cdata_content = gtk_text_tag_table_lookup(table, "cdata_content");
markup->entity_ref = gtk_text_tag_table_lookup(table, "entity_ref");
markup->namespace_name = gtk_text_tag_table_lookup(table, "namespace_name");
markup->namespace_uri = gtk_text_tag_table_lookup(table, "namespace_uri");
markup->error = gtk_text_tag_table_lookup(table, "error");
return markup;
}
//
// Returns the path of a node. The path is expected to be unique for each node.
//
// This function returns a string that has to be freed with g_free().
//
gchar* xacobeo_get_node_path (xmlNode *origin, HV *namespaces) {
if (origin == NULL) {
return NULL;
}
// Reverse the path to the node (from top to bottom)
GSList *list = NULL;
for (xmlNode *iter = origin; iter; iter = iter->parent) {
list = g_slist_prepend(list, iter);
}
// Build the path to the node
GString *gstring = g_string_sized_new(32);
gboolean use_separator = FALSE;
for (GSList *iter = list; iter; iter = iter->next) {
xmlNode *node = (xmlNode *) iter->data;
switch (node->type) {
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
g_string_append_c(gstring, '/');
break;
case XML_ELEMENT_NODE:
if (use_separator) {
g_string_append_c(gstring, '/');
}
else {
use_separator = TRUE;
}
gchar *name = my_get_node_name_prefixed(node, namespaces);
g_string_append(gstring, name);
g_free(name);
// Check if the node has siblings with the same name and namespace. If
// yes then we must add an offset to the xpath expression.
// Look for previous sibling with the same name.
int similar = 0;
for (xmlNode *sibling = node->prev; sibling; sibling = sibling->prev) {
if (ELEMENT_MATCH(sibling, node)) {
++similar;
}
}
if (similar == 0) {
// No previous sibling, but maybe we are the first in the incoming
// list! Let's scan on the other direction
for (xmlNode *sibling = node->next; sibling; sibling = sibling->next) {
if (ELEMENT_MATCH(sibling, node)) {
similar = 1;
break;
}
}
}
else {
// XPath starts its indexex at 1 instead of 0
++similar;
}
if (similar) {
// The node name is not unique we must add an index
g_string_append_printf(gstring, "[%d]", similar);
}
break;
default:
WARN("Unknown XML type %d for %s", node->type, node->name);
break;
}
}
g_slist_free(list);
gchar *path = g_strdup(gstring->str);
g_string_free(gstring, TRUE);
return path;
}
//
// Returns a unique identifier for a give node.
//
// This function returns a string that has to be freed with g_free().
//
gchar* xacobeo_get_node_mark (xmlNode *node) {
return g_strdup_printf("%p", (void *)node);
}