Simplewiki  1.1
parser.php
Go to the documentation of this file.
1 <?php
2 // Muster Software Copyright (c) Henrik Bechmann, Toronto, Canada 2009-2012. All rights reserved.
3 // See "musterlicence.txt" for licencing information.
4 // mustersoftware.net
5 
6 namespace Muster\Simplewiki;
7 
8 use StdClass;
9 
14 #==========================================================================
15 #-----------------------------[ PARSER ]----------------------------------
16 #==========================================================================
17 
31 class Parser
32 {
43  protected $link_inline_re;
44  protected $image_inline_re;
45  protected $item_inline_re;
46  protected $defitem_inline_re;
47  protected $cell_re;
48  protected $decorator_re;
50  protected $pre_escape_re;
51  protected $_rules;
52  protected $block_re;
53  protected $inline_re;
54 
57 
58  protected $_curnode;
60  protected $_leaftextnode;
62  protected $_root;
65 
66  protected $_raw;
68  protected $_preprocessed_raw;
70  protected $_argchars = '[^()]'; // no parenthesis; for any char, use '(?s:.)' (any char including newline)
71 // protected $_argchars = '\\w\\s:="\'%\\\#.-'; // for decorators, notably parentheses omitted for security
73  protected $_metadata; // from first line ```## arguments
75  protected $_markerdata; // from preprocessing
79  protected $_markers;
87  public function __construct($text)
88  {
89  $this->_set_rules();
90  $this->_set_re($this->_rules);
91  $this->prepare($text);
92  }
104  protected function _set_rules()
105  {
106  // the first group name of each rule, if set, is used by controller (_create_node($preg_groups))
107  // for further processing of parsed data
108  $rules = new StdClass;
109  $argchars = $this->_argchars;
110  #================================[ basic processing ]=================================#
111  # no explicit action by user (other than include blank lines between blocks)
112 // $rules->char = '(?P<char> . )'; // slower, but allows capture of raw url's
113 // $rules->char = '(?P<char> ([\\w\\s]+$|. ))'; //faster, but misses raw url's, twice as slow as markup filter
114  // markup characters: =/*-\[]|#{}%:<>~
117  $rules->char = '(?P<char> ([^\/\*\\\[{%<~]+|.))'; // characters before next inline markup start, or markup start char
119  $rules->blankline = '(?P<blankline> ^ \s* $ )';
121  $rules->paragraph = '(?P<paragraph>
122  ^\s*(\|:p\s+(?P<paragraph_decorator>'.$argchars.'+?):\|)?(?P<text_chars>.+)
123  |
124  (?P<text_charstream>.+)
125  )';
126  #================================[ core markup ]===============================#
127  #--------------------------------[ basic markup ]------------------------------#
128 
129  $rules->heading = '(?P<heading>
130  ^\s*(\|:h\s+(?P<heading_decorator>'.$argchars.'+?):\|)? \s*
131  (?P<heading_head>={1,6}) \s*
132  (?P<heading_text> .*? ) \s*
133  (?P<heading_tail>=*) \s*
134  $
135  )';
140  $rules->emph = '(?P<emph> (?<!~)\/\/ )';
142  $rules->strong = '(?P<strong> \*\* )';
144  $rules->linebreak = '(?P<linebreak> \\\\\\\\ )';
145  // \b horizontalrule: horizontal rule: "-----*"
146  $rules->horizontalrule = '(?P<horizontalrule>
147  ^ \s* -----* \s* $
148  )';
149  #--------------------------------[ links ]-------------------------------------#
150 
151  $rules->link = '(?P<link>
152  (%l\s+(?P<link_decorator>'.$argchars.'+?)%)?
153  \[\[
154  (?P<link_target>\S+?) \s*
155  (\| \s* (?P<link_text>.*?) \s* (\| \s* (?P<link_title>[^|\]}]+))? \s*)?
156  \]\](?!]) # allow embedded "]"
157  )';
158  #--------------------------------[ images ]-------------------------------------#
159 
160  $rules->image = '(?P<image>
161  (%i\s+(?P<image_decorator>'.$argchars.'+?)%)?
162  {{
163  (?P<image_target>\S+?) \s*
164  (\| \s* (?P<image_text>.*?) \s* (\| \s* (?P<image_title>[^|\]}]+))? \s*)?
165  }}
166  )';
167  #--------------------------------[ lists ]-------------------------------------#
168 
169 
170  $rules->deflist = '(?P<deflist>
171  ^ \s*
172  (\|:d[ltd]\s+('.$argchars.'+?):\|){0,3}
173  (:(?=[^:])).* $ # only one opening list marker allowed
174  (\n [\t\x20]*
175  (\|:d[ltd]\s+('.$argchars.'+?):\|){0,3}
176  :+.* $
177  )*
178  )';
180  $rules->defitem = '(?P<defitem>
181  ^\s*
182  (\|:dl\s+(?P<deflist_decorator>('.$argchars.'+?)):\|)?
183  (\|:dt\s+(?P<defterm_decorator>('.$argchars.'+?)):\|)?
184  (\|:dd\s+(?P<defdesc_decorator>('.$argchars.'+?)):\|)?
185  (?P<defitem_head> :+) \s*
186  ((?P<defterm_text> .*?)(?<!~)::)?
187  (?P<defdesc_text> .*?)
188  \s*$
189  )';
192  $rules->list = '(?P<list>
193  ^\s*
194  (\|:([uo]l|li) \s+('.$argchars.'+?):\|){0,2}
195  ([*\\#](?=[^*\\#])).* $ # only one opening list marker allowed
196  (\n [\t\x20]*
197  (\|:([uo]l|li) \s+('.$argchars.'+?):\|){0,2}
198  [*\#]+.* $
199  )*
200  )';
202  $rules->item = '(?P<item>
203  ^\s*
204  (\|:[uo]l\s+(?P<list_decorator> ('.$argchars.'+?)):\|)?
205  (\|:li\s+(?P<item_decorator>('.$argchars.'+?)):\|)?
206  (?P<item_head> [\#*]+) \s*
207  (?P<item_text> .*)
208  \s*$
209  )';
210  #--------------------------------[ tables ]-------------------------------------#
211 
212  $rules->table = '(?P<table>
213  ^\s*
214  (\|:table\\s+(?P<table_decorator>('.$argchars.'+?)):\|)?
215  (\|:tr\s+(?P<row_decorator>('.$argchars.'+?)):\|)?
216  \s*
217  (?P<table_row>
218  (((?<!~)\|:t[dh]\s+('.$argchars.'+?):\|)?\|(?!:[a-z]).*?)* \s*
219  \| \s*
220  )
221  \s*$
222  )';
224  // positive lookbehind for ~ catches escaped vertical bars for inclusion in extraction
225  $rules->cell = '
226  (\|:t[dh]\s+(?P<cell_decorator>'.$argchars.'+?):\|)?
227  \| \s*
228  (
229  (?P<head> = ([^|]|(?<=~)\|)+ ) |
230  (?P<cell> ([^|]|(?<=~)\|)+ )
231  ) \s*
232  ';
233  #================================[ escape character ]=================================#
234 
235  $rules->escape = '(?P<escape> ~ (?P<escaped_char>\S) )'; # embedded in various regex's
236  #================================[ special decorators ]===============================#
237  #--------------------------------[ span decoration ]----------------------------------#
239  $rules->span = '(?P<span> %(s\s+(?P<span_decorator>'.$argchars.'+?))?% )';
240  #--------------------------------[ block dividers ]-----------------------------------#
242  $rules->blockdivider = '(?P<blockdivider>
243  ^\s* \|:b \s+(?P<blockdivider_decorator>('.$argchars.'+?)):\| \s* $
244  )'; # generic block
245  #===============================[ preformatted text ]=================================#
246  // inline
248  $rules->code = '(?P<code>
249  (%c\s+(?P<code_decorator>'.$argchars.'+?)%)?{{{ (?P<code_text>.*?) }}}
250  )';
252  $rules->pre = '(?P<pre>
253  ^\s*(\|:pre\s+(?P<pre_decorator>'.$argchars.'+?):\|)?(?<!~){{{ \s* $
254  (\n)?
255  (?P<pre_markup>
256  ([\#]!(?P<pre_type>\w*?)(\s+.*)?$)?
257  (.|\n)+?
258  )
259  \n?
260  ^\s*}}} \s*$
261  )';
263  $rules->pre_escape = ' ^(?P<indent>\s*) ~ (?P<rest> \}\}\} \s*) $';
264  #================================[ advanced markup ]===============================#
265  #--------------------------------[ block declarations ]------------------------------#
268  $rules->blockdef = '
269  ^(?P<blockdef>
270  \n?(?P<block_indent>[\t\x20]*)\(:(?P<block_selector>\w+)(\s+(?!:\))(?P<block_decorator>'.$argchars.'+?))? \s* :\)
271  \s*?(?P<block_inline>.*) $
272  (?P<block_content>(\n.*$)*?)
273  \n(?P=block_indent)\(:(?P=block_selector)end\s*:\)\s*$
274  )';
275 
276  #--------------------------------[ macros ]--------------------------------#
278  $rules->macro = '(?P<macro>
279  <<
280  (?P<macro_name> \w+)
281  ((?P<macro_args> '.$argchars.'*) )? \s*
282  (\| \s* (?P<macro_text> .+?) \s* )?
283  >>
284  )';
286  $rules->blockmacro = '(?P<blockmacro>
287  ^ \s*
288  <<
289  (?P<blockmacro_name> \w+)
290  ((?P<blockmacro_args> '.$argchars.'*) )? \s*
291  (\| \s* (?P<blockmacro_text> .+?) \s* )?
292  >> \s*
293  $
294  )';
296  $rules->decorator = '
297  (?>(?P<variable>[\w-]+)(?P<operator>[:=]))? # optional attribute or property name, and operator applied
298  (
299  "(?P<ddelim_value>.*?)(?<!\\\)" # double quote delimited
300  |
301  \'(?P<sdelim_value>.*?)(?<!\\\)\' # single quote delimited
302  |
303  (?P<ndelim_value>\S+) # not delimited
304  )
305  ';
306  $this->_rules = $rules;
307  }
308  #---------------------------------------------------------------------------------------#
309  #------------------------------[ set regular expressions ]------------------------------#
310  #---------------------------------------------------------------------------------------#
311 
324  protected function _set_re($rules)
325  {
326  // from least to most general
327  // For special case pre escaping, in creole 1.0 done with ~:
328  $this->pre_escape_re = '/' . $rules->pre_escape . '/xm';
329  // For sub-processing: includes image, but excludes links
330  $this->link_inline_re = "/\n"
331  . implode("\n|\n",
332  array($rules->code, $rules->image, $rules->strong,
333  $rules->emph, $rules->span, $rules->linebreak,
334  $rules->escape, $rules->char))
335  . "\n/x"; # for link captions
336  // For sub-processing: includes links, but excludes images
337  $this->image_inline_re = "/\n"
338  . implode("\n|\n",
339  array($rules->link, $rules->code, $rules->strong,
340  $rules->emph, $rules->span, $rules->linebreak,
341  $rules->escape, $rules->char))
342  . "\n/x"; # for image captions
343  $this->item_inline_re = '/' . $rules->item . '/xm'; // for list items
344  $this->defitem_inline_re = '/' . $rules->defitem . '/xm'; // for def list items
345  $this->cell_re = '/' . $rules->cell . '/x'; // for quick table cells
346  // For inline elements:
347  $this->inline_re = "/\n"
348  . implode("\n|\n",
349  array($rules->link, $rules->macro,
350  $rules->code, $rules->image, $rules->strong, $rules->emph,
351  $rules->span, $rules->linebreak, $rules->escape, $rules->char))
352  . "\n/x";
353  // set aside table row contents
354  $this->tablerow_setaside_re = "/\n"
355  . implode("\n|\n", array($rules->link, $rules->macro,$rules->code,$rules->image))
356  . "\n/x";
357  // For block elements:
358  $this->block_re = "/\n"
359  . implode("\n|\n",
360  array($rules->blankline, $rules->blockdef, $rules->heading,
361  $rules->horizontalrule, $rules->blockdivider, $rules->blockmacro,
362  $rules->pre, $rules->list, $rules->deflist, $rules->table, $rules->paragraph))
363  . "\n/xm";
364 // var_dump($this->block_re);
365  $this->decorator_re = '/' . $rules->decorator . '/x';
366  }
376  public function metadata()
377  {
378  return $this->_metadata;
379  }
387  public function markerdata()
388  {
389  return $this->_markerdata;
390  }
396  public function preprocessed_markup()
397  {
398  return $this->_preprocessed_markup;
399  }
404  public function argchars($argchars = NULL)
405  {
406  if (!is_null($argchars))
407  {
408  $this->_argchars = $argchars;
409  // recompile regex
410  $this->_set_rules();
411  $this->_set_re($this->_rules);
412  }
413  return $this->_argchars;
414  }
416  #---------------------[ process initiation ]--------------------------#
417 
431  public function prepare($markup)
432  {
433  $this->_raw = $markup;
434  $this->_root = new DocNode(DocNode::DOCUMENT); # 'document' is the top level node
435  $this->_curnode = $this->_root; # The most recent document node
436  $this->_leaftextnode = NULL; # The node to add inline characters to
437  $raw = $this->preprocess_raw_markup($this->_raw);
438  return $this;
439  }
446  public function parse() // initiate parsing
447  {
448  # try to clean $raw of unnecessary newlines
449  # parse the text given as $this->_raw...
450  $this->_parse_block($this->_preprocessed_raw);
451  #...and return DOM tree.
452  return $this->_root;
453  }
463  protected function _parse_block($raw)
464  {
465  # Recognize block elements.
466  preg_replace_callback($this->block_re, array($this,'_create_node'), $raw);
467  }
477  protected function _parse_inline($raw)
478  {
479  # Recognize inline elements inside blocks.
480  preg_replace_callback($this->inline_re, array($this,'_create_node'), $raw);
481  }
482  #---------------------[ process control ]--------------------------#
483 
493  protected function _create_node($preg_groups) // controller
494  {
495  # Invoke appropriate _*_node method. Called for every matched group.
496  foreach ($preg_groups as $name => $text)
497  {
498  if ((!is_int($name)) and ($text != ''))
499  {
500  $node_method = "_{$name}_node";
501  $this->$node_method($preg_groups);
502  return;
503  }
504  }
505  # special case: pick up empty line for block boundary
506  $keys = array_keys($preg_groups);
507  $name = 'blankline';
508  if ($keys[count($keys)-2]==$name) // last name in key array indicates returned as found
509  {
510  $node_method = "_{$name}_node";
511  $this->$node_method($preg_groups);
512  return;
513  }
514  }
517 
518  protected $_pre_markers = array();
520  protected $_pre_markup = array();
522  protected $_pre_count = 0;
530  protected function add_pre_and_code_markers($preg_groups)
531  {
532  isset($preg_groups['pre']) or ($preg_groups['pre'] = '');
533  isset($preg_groups['code']) or ($preg_groups['code'] = '');
534  $this->_pre_markup[] = preg_replace('/(\$|\\\\)(?=\d)/', '\\\\\1', // escape backreference markup
535  $preg_groups['pre'].$preg_groups['code']); // one or the other
536  $this->_pre_count++;
537  $marker = '{{{' . chr(255). $this->_pre_count . '}}}';
538  $this->_pre_markers[] = '/{{\\{' . chr(255) . $this->_pre_count . '\\}}}/';
539  return $marker;
540  }
542  protected $_tablerow_markers = array();
544  protected $_tablerow_markup = array();
546  protected $_tablerow_count = 0;
553  protected function add_tablerow_markers($preg_groups)
554  {
555  isset($preg_groups['link']) or ($preg_groups['link'] = '');
556  isset($preg_groups['macro']) or ($preg_groups['macro'] = '');
557  isset($preg_groups['code']) or ($preg_groups['code'] = '');
558  isset($preg_groups['image']) or ($preg_groups['image'] = '');
559  ($value = $preg_groups['link']) or ($value = $preg_groups['macro']) or
560  ($value = $preg_groups['code']) or ($value = $preg_groups['image']);
561  $this->_tablerow_markup[] = $value;
562  $this->_tablerow_count++;
563  $marker = '{{{' . chr(255). $this->_tablerow_count . '}}}';
564  $this->_tablerow_markers[] = '/{{\\{' . chr(255) . $this->_tablerow_count . '\\}}}/';
565  return $marker;
566  }
579  protected function preprocess_raw_markup($raw)
580  {
581  # get metadata
582  $raw = preg_replace_callback('/\A```##(.*$(\n``.*$)*)/m',array($this,'preprocess_metadata'),$raw);
583  $raw = "\n".$raw."\n"; // in case there is comment on first line, lookahead on last
584  # remove comments
585 // $raw = preg_replace('/\n```.*$/m','',$raw);
586  $raw = preg_replace('/```.*$/m','',$raw); // allow comment starting anywhere in line
587  # remove line continuations
588  $raw = preg_replace('/\n``/','',$raw);
589  # set aside preformatted blocks
590  $raw = preg_replace_callback('/'.$this->_rules->pre .'|' .$this->_rules->code .'/xm',array($this,'add_pre_and_code_markers'),$raw);
591  # add markup around raw url's; this allows "//" emphasis markup to operate without constraint
592  $raw = preg_replace('/(^|\W)((?<!\[\[|{{|~)(http[s]?|mailto):\/\/\S+\w)/','$1[[$2]]',$raw); // add markup to raw url
593  # restore preformatted blocks
594  $raw = preg_replace($this->_pre_markers,$this->_pre_markup,$raw);
595  # get marker data and offsets
596  $markerdata = $this->_markerdata = new StdClass;
597  $markerdata->offset = 0;
598  $markerdata->markercount = 0;
599  $this->_markers = array();
600  // pull out markers {{##markername marker decoration##}}
601  $re = '/(?P<text>[^{]*)|(?<!~)(?P<marker>\{\{##(?P<markername>[a-zA-Z]\w*)(\s+(?P<decorator>'.$this->_argchars.'+?))?\s*##\}\})|(?P<char>.)/';
602  $raw = preg_replace_callback($re,array($this,'preprocess_markerdata'),$raw);
603  $markerdata->markers = $this->_markers;
604  $this->_markers = NULL;
605  $markerdata->textlength = $markerdata->offset;
606  unset($markerdata->offset);
607  $this->_preprocessed_raw = $raw;
608  return $raw;
609  }
616  protected function preprocess_markerdata($matches)
617  {
618  isset($matches['text']) or ($matches['text'] = '');
619  isset($matches['char']) or ($matches['char'] = '');
620  isset($matches['marker']) or ($matches['marker'] = '');
621  isset($matches['markername']) or ($matches['markername'] = '');
622  isset($matches['decorator']) or ($matches['decorator'] = '');
623  $text = $matches['text'].$matches['char'];
624  $this->_markerdata->offset += strlen($text);
625  if ($marker = $matches['marker'])
626  {
627  $this->_markerdata->markercount++;
628  $markerobject = $this->_markers[] = new StdClass;
629  $markerobject->offset = $this->_markerdata->offset;
630  $name = $markerobject->name = $matches['markername'];
631  $this->_markers[$name] = $markerobject;
632  if ($decorator = $matches['decorator'])
633  $markerobject->decoration = $this->get_decoration($decorator);
634  else
635  $markerobject->decoration = NULL;
636  }
637  return $text;
638  }
645  protected function preprocess_metadata($matches)
646  {
647  $arguments = trim($matches[1]);
648  // remove line continuations
649  $arguments = preg_replace('/\n``/','',$arguments);
650  // save data
651  $this->_metadata = $this->get_decoration($arguments);
652  return '';
653  }
658  public function get_decoration($decorator_string)
659  {
660  $decoration = new StdClass;
661  $decoration->classes = array();
662  $decoration->properties = array();
663  $decoration->attributes = array();
664  $terms = array();
665  preg_match_all($this->decorator_re, $decorator_string, $terms, PREG_SET_ORDER); // returns terms
666  foreach($terms as $term)
667  {
668  isset($term['variable']) or ($term['variable'] = '');
669  isset($term['operator']) or ($term['operator'] = '');
670  isset($term['ddelim_value']) or ($term['ddelim_value'] = '');
671  isset($term['sdelim_value']) or ($term['sdelim_value'] = '');
672  isset($term['ndelim_value']) or ($term['ndelim_value'] = '');
673  $variable = $term['variable'];
674  $operator = $term['operator'];
675  if ($term['ddelim_value']) $delimiter = '"';
676  elseif ($term['sdelim_value']) $delimiter = "'";
677  else $delimiter = '';
678  // only one of the following will not be empty
679  $value = $term['ddelim_value'] . $term['sdelim_value'] . $term['ndelim_value'];
680  switch ($operator)
681  {
682  case '=':
683  $decoration->attributes[$variable] = $value;
684  if ($delimiter == '') $delimiter = '"';
685  $decoration->attributedelimiters[$variable] = $delimiter;
686  break;
687  case ':':
688  $decoration->properties[$variable] = $value;
689  break;
690  default:
691  $decoration->classes[] = $value;
692  break;
693  }
694  }
695  return $decoration;
696  }
702  protected function set_node_decoration($node,$decorator_string)
703  {
704  $node->decoration = $this->get_decoration($decorator_string);
705  $node->decoration->markup = $decorator_string;
706  }
707  #------------------------------------------------------------------------------#
708  #---------------------------[ utilities ]--------------------------------------#
709  #------------------------------------------------------------------------------#
710 
718  public function get_selected_ancestor($node, $types) // public as can be used by registered callbacks
719  {
720  while ((!empty($node->parent)) and (!in_array($node->type,$types)))
721  {
722  $node = $node->parent;
723  }
724  return $node;
725  }
735  #=========================[ basic processing ]=================================#
736 
743  protected function _char_node($preg_groups) // can create text leaf node
744  {
745  $char = $preg_groups['char'];
746  if (is_null($this->_leaftextnode))
747  $this->_leaftextnode = new DocNode(DocNode::TEXT, $this->_curnode);
748  $this->_leaftextnode->textcontent .= $char;
749  }
758  protected function _escape_node($preg_groups)
759  {
760  $char = $preg_groups['escaped_char'];
761  if (is_null($this->_leaftextnode))
762  $this->_leaftextnode = new DocNode(DocNode::TEXT, $this->_curnode);
763  $this->_leaftextnode->textcontent .= $char;
764  }
770  protected function _blankline_node($preg_groups)
771  {
772  # triggers new block
773  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
775  }
790  protected function _paragraph_node($preg_groups) // can create paragraph for new text
791  {
792  # text not otherwise classified, triggers creation of paragraph for new set
793  isset($preg_groups['text_chars']) or ($preg_groups['text_chars'] = '');
794  isset($preg_groups['text_charstream']) or ($preg_groups['text_charstream'] = '');
795  isset($preg_groups['paragraph_decorator']) or ($preg_groups['paragraph_decorator'] = '');
796  $text = $preg_groups['text_chars'] . $preg_groups['text_charstream'];
797  $decorator = $preg_groups['paragraph_decorator'];
798  if (in_array($this->_curnode->type,
799  array(
804  DocNode::DEF_LIST))) // text cannot exist in these blocks
805  {
806  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
808  }
809  if (in_array($this->_curnode->type, array(DocNode::DOCUMENT,DocNode::BLOCKDEF)))
810  {
811  $node = $this->_curnode = new DocNode(DocNode::PARAGRAPH, $this->_curnode);
812  if ($decorator) $this->set_node_decoration($node,$decorator);
813  } else {
814  $text = ' ' . $text;
815  }
816  $this->_parse_inline($text);
817  $this->_leaftextnode = NULL;
818  }
819  #================================[ core markup ]===============================#
820  #--------------------------------[ basic markup ]------------------------------#
821 
831  protected function _heading_node($preg_groups)
832  {
833  # headings
834  isset($preg_groups['heading_text']) or ($preg_groups['heading_text'] = '');
835  isset($preg_groups['heading_head']) or ($preg_groups['heading_head'] = '');
836  isset($preg_groups['heading_decorator']) or ($preg_groups['heading_decorator'] = '');
837  $headtext = $preg_groups['heading_text'];
838  $headhead = $preg_groups['heading_head'];
839  $decorator = $preg_groups['heading_decorator'];
840 
841  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
843 
844  $node = new DocNode(DocNode::HEADING,$this->_curnode);
845  $node->level = strlen($headhead);
846  if ($decorator) $this->set_node_decoration($node,$decorator);
847 
848  $parent = $this->_curnode;
849  $this->_curnode = $node;
850  $this->_leaftextnode = NULL;
851 
852  $this->_parse_inline($headtext);
853 
854  $this->_curnode = $parent;
855  $this->_leaftextnode = NULL;
856  }
864  protected function _emph_node($preg_groups)
865  {
866  # emphasis
867  if ($this->_curnode->type != DocNode::EMPHASIS)
868  $this->_curnode = new DocNode(DocNode::EMPHASIS, $this->_curnode);
869  else
870  {
871  if (!empty($this->_curnode->parent))
872  $this->_curnode = $this->_curnode->parent;
873  }
874  $this->_leaftextnode = NULL;
875  }
883  protected function _strong_node($preg_groups)
884  {
885  # strong
886  if ($this->_curnode->type != DocNode::STRONG)
887  $this->_curnode = new DocNode(DocNode::STRONG, $this->_curnode);
888  else
889  {
890  if (!empty($this->_curnode->parent))
891  $this->_curnode = $this->_curnode->parent;
892  }
893  $this->_leaftextnode = NULL;
894  }
900  protected function _linebreak_node($preg_groups)
901  {
902  # line break
903  new DocNode(DocNode::LINEBREAK, $this->_curnode);
904  $this->_leaftextnode = NULL;
905  }
912  protected function _horizontalrule_node($preg_groups)
913  {
914  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
916  new DocNode(DocNode::HORIZONTALRULE, $this->_curnode);
917  }
926  protected function _link_node($preg_groups)
927  {
928  # Handle all types of links.
929  isset($preg_groups['link_target']) or ($preg_groups['link_target'] = '');
930  isset($preg_groups['link_text']) or ($preg_groups['link_text'] = '');
931  isset($preg_groups['link_title']) or ($preg_groups['link_title'] = '');
932  isset($preg_groups['link_decorator']) or ($preg_groups['link_decorator'] = '');
933  $target = trim($preg_groups['link_target']);
934  $text = trim($preg_groups['link_text']);
935  $title = trim($preg_groups['link_title']);
936  $decorator = trim($preg_groups['link_decorator']);
937 
938  $node = new DocNode(DocNode::LINK, $this->_curnode);
939  $node->target = $target;
940  if ($decorator) $this->set_node_decoration($node,$decorator);
941  if ($title) $node->title = $title;
942 
943  $parent = $this->_curnode;
944  $this->_curnode = $node;
945  $this->_leaftextnode = NULL;
946 
947  preg_replace_callback($this->link_inline_re, array($this,'_create_node'), $text);
948 
949  $this->_curnode = $parent;
950  $this->_leaftextnode = NULL;
951  }
952  #--------------------------------[ images ]-------------------------------------#
953 
961  protected function _image_node($preg_groups)
962  {
963  # Handles images included in the page.
964  isset($preg_groups['image_target']) or ($preg_groups['image_target'] = '');
965  isset($preg_groups['image_text']) or ($preg_groups['image_text'] = '');
966  isset($preg_groups['image_title']) or ($preg_groups['image_title'] = '');
967  isset($preg_groups['image_decorator']) or ($preg_groups['image_decorator'] = '');
968  $target = trim($preg_groups['image_target']);
969  $text = trim($preg_groups['image_text']);
970  $title = trim($preg_groups['image_title']);
971  $decorator = trim($preg_groups['image_decorator']);
972 
973  $node = new DocNode(DocNode::IMAGE, $this->_curnode);
974  $node->target = $target;
975  if ($decorator) $this->set_node_decoration($node,$decorator);
976  if ($title != '') $node->title = $title;
977 
978  $parent = $this->_curnode;
979  $this->_curnode = $node;
980  $this->_leaftextnode = NULL;
981 
982  preg_replace_callback($this->image_inline_re, array($this,'_create_node'), $text);
983 
984  $this->_curnode = $parent;
985  $this->_leaftextnode = NULL;
986  }
987  #--------------------------------[ lists ]-------------------------------------#
988 
993  protected function _list_node($preg_groups)
994  {
995  # collect list markup, detail processing by item
996  $text = $preg_groups['list'];
997  preg_replace_callback($this->item_inline_re,array($this,'_create_node'), $text);
998  }
1008  protected function _item_node($preg_groups)
1009  {
1010  # list item
1011  isset($preg_groups['item_head']) or ($preg_groups['item_head'] = '');
1012  isset($preg_groups['item_text']) or ($preg_groups['item_text'] = '');
1013  isset($preg_groups['list_decorator']) or ($preg_groups['list_decorator'] = '');
1014  isset($preg_groups['item_decorator']) or ($preg_groups['item_decorator'] = '');
1015  $bullet = $preg_groups['item_head'];
1016  $text = $preg_groups['item_text'];
1017  $listdecorator = $preg_groups['list_decorator'];
1018  $itemdecorator = $preg_groups['item_decorator'];
1019  // determine the type of list being processed
1020  if ($bullet{0} == '#')
1021  $listtype = DocNode::ORDERED_LIST;
1022  else
1023  $listtype = DocNode::UNORDERED_LIST;
1024  // determine the level by measuring the number of list markup characters
1025  $level = strlen($bullet);
1026  # Find a node of the same type and level up the tree, or a block to start a list
1027  $candidate_node = $this->_curnode;
1028  while // find a reference node if current list doesn't match, and if we're not in a block node to start
1029  (
1030  ($candidate_node) // searching an existing node
1031  and ! // this is a not a list of the same level...
1032  (
1033  in_array($candidate_node->type, array(DocNode::ORDERED_LIST, DocNode::UNORDERED_LIST))
1034  and $candidate_node->level == $level
1035  )
1036  and ! // ... and this is not a block ...
1037  (
1038  in_array($candidate_node->type, array(DocNode::DOCUMENT,DocNode::BLOCKDEF))
1039  )
1040  ) // ... so keep looking.
1041  {
1042  $candidate_node = $candidate_node->parent;
1043  }
1044  # set the found list as the current node for the list item...
1045  # (if $candidate_node is null then no reference candidate was found)
1046  if ($candidate_node and ($candidate_node->type == $listtype)) // found a match for list
1047  $this->_curnode = $candidate_node;
1048  else # ... or create a new level of list
1049  {
1050  // get the nearest ancestor candidate for creating a new list
1051  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1053  // create the list
1054  $listnode = $this->_curnode = new DocNode($listtype, $this->_curnode);
1055  if ($listdecorator) $this->set_node_decoration($listnode,$listdecorator);
1056  $listnode->level = $level;
1057  }
1058  # now add the list item to the list
1059  $itemnode = $this->_curnode = new DocNode(DocNode::LIST_ITEM, $this->_curnode);
1060  if ($itemdecorator) $this->set_node_decoration($itemnode,$itemdecorator);
1061  $this->_leaftextnode = NULL;
1062  # parse the text of the list item
1063  $this->_parse_inline($text);
1064  $this->_leaftextnode = NULL;
1065  }
1066  #--------------------------------[ definition list ]-------------------------------------#
1067 
1072  protected function _deflist_node($preg_groups)
1073  {
1074  # collect list markup, detail processing by item
1075  $text = $preg_groups['deflist'];
1076  preg_replace_callback($this->defitem_inline_re,array($this,'_create_node'), $text);
1077  }
1088  protected function _defitem_node($preg_groups)
1089  {
1090  # list item
1091  isset($preg_groups['defitem_head']) or ($preg_groups['defitem_head'] = '');
1092  isset($preg_groups['defterm_text']) or ($preg_groups['defterm_text'] = '');
1093  isset($preg_groups['defdesc_text']) or ($preg_groups['defdesc_text'] = '');
1094  isset($preg_groups['deflist_decorator']) or ($preg_groups['deflist_decorator'] = '');
1095  isset($preg_groups['defterm_decorator']) or ($preg_groups['defterm_decorator'] = '');
1096  isset($preg_groups['defdesc_decorator']) or ($preg_groups['defdesc_decorator'] = '');
1097  $head = $preg_groups['defitem_head'];
1098  $term = trim($preg_groups['defterm_text']);
1099  $desc = $preg_groups['defdesc_text'];
1100  $listdecorator = $preg_groups['deflist_decorator'];
1101  $termdecorator = $preg_groups['defterm_decorator'];
1102  $descdecorator = $preg_groups['defdesc_decorator'];
1103  // set the type of list being processed
1104  $listtype = DocNode::DEF_LIST;
1105  // determine the level by measuring the number of list markup characters
1106  $level = strlen($head);
1107  # Find a node of the same type and level up the tree, or a block to start a list
1108  $candidate_node = $this->_curnode;
1109  while // find a reference node if current list doesn't match, and if we're not in a block node to start
1110  (
1111  ($candidate_node) // searching an existing node
1112  and ! // this is a not a list of the same level...
1113  (
1114  ($candidate_node->type == DocNode::DEF_LIST)
1115  and $candidate_node->level == $level
1116  )
1117  and ! // ... and this is not a block ...
1118  (
1119  in_array($candidate_node->type, array(DocNode::DOCUMENT,DocNode::BLOCKDEF))
1120  )
1121  ) // ... so keep looking.
1122  $candidate_node = $candidate_node->parent;
1123  # set the found list as the current node for the list item...
1124  # (if $candidate_node is null then no reference candidate was found)
1125  if ($candidate_node and ($candidate_node->type == $listtype)) // found a match for list
1126  $this->_curnode = $candidate_node;
1127  else # ... or create a new level of list
1128  {
1129  // get the nearest ancestor candidate for creating a new list
1130  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1132  // create the list
1133  $listnode = $this->_curnode = new DocNode($listtype, $this->_curnode);
1134  if ($listdecorator) $this->set_node_decoration($listnode,$listdecorator);
1135  $listnode->level = $level;
1136  }
1137  # now add the term to the list, if present
1138  if ($term)
1139  {
1140  $curnode = $this->_curnode;
1141  $termnode = $this->_curnode = new DocNode(DocNode::DEF_TERM, $this->_curnode);
1142  if ($termdecorator) $this->set_node_decoration($termnode,$termdecorator);
1143  $this->_leaftextnode = NULL;
1144  # parse the text of the term
1145  $this->_parse_inline($term);
1146  $this->_leaftextnode = NULL;
1147  $this->_curnode = $curnode;
1148  }
1149  # ...and add the desc to the list
1150  $descnode = $this->_curnode = new DocNode(DocNode::DEF_DESC, $this->_curnode);
1151  if ($descdecorator) $this->set_node_decoration($descnode,$descdecorator);
1152  $this->_leaftextnode = NULL;
1153  # parse the text of the desc
1154  $this->_parse_inline($desc);
1155  $this->_leaftextnode = NULL;
1156  }
1157  #--------------------------------[ tables ]-------------------------------------#
1158 
1169  protected function _table_node($preg_groups)
1170  {
1171  # process a table row (any line beginning with '|')
1172  isset($preg_groups['table_row']) or ($preg_groups['table_row'] = '');
1173  isset($preg_groups['table_decorator']) or ($preg_groups['table_decorator'] = '');
1174  isset($preg_groups['row_decorator']) or ($preg_groups['row_decoratpor'] = '');
1175  $rowmarkup = trim($preg_groups['table_row']);
1176  # set aside rowmarkup links, preformats, macros and images to simplify markup
1177  $rowmarkup = preg_replace_callback(
1178  $this->tablerow_setaside_re, array($this,'add_tablerow_markers'),$rowmarkup);
1179  # assure at least content of a space in every cell.
1180  $rowmarkup = preg_replace('/((?<!:)\|(?=\|))/','| ',$rowmarkup); // ensure content for every cell
1181  $tabledecorator = trim($preg_groups['table_decorator']);
1182  $rowdecorator = trim($preg_groups['row_decorator']);
1183 
1184  # set reference node to nearest table, document, or block ancestor
1185  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1187  # create new table node if necessary
1188  if ($this->_curnode->type != DocNode::TABLE)
1189  $this->_curnode = new DocNode(DocNode::TABLE, $this->_curnode);
1190  # set decoration for table
1191  $tablenode = $this->_curnode;
1192  if ($tabledecorator) $this->set_node_decoration($tablenode,$tabledecorator);
1193 
1194  # create a new row node
1195  $row_node = $this->_curnode = new DocNode(DocNode::TABLE_ROW, $tablenode);
1196  # add decoration to new row
1197  if ($rowdecorator) $this->set_node_decoration($row_node,$rowdecorator);
1198 
1199  # collect all cell markup into $cell_matches
1200  preg_match_all($this->cell_re, $rowmarkup, $cell_matches, PREG_SET_ORDER);
1201  # process cell_matches
1202  $this->_leaftextnode = NULL;
1203  foreach ($cell_matches as $cell_groups) {
1204  # get cell markup
1205  isset($cell_groups['cell']) or ($cell_groups['cell'] = '');
1206  isset($cell_groups['head']) or ($cell_groups['head'] = '');
1207  isset($cell_groups['cell_decorator']) or ($cell_groups['cell_decorator'] = '');
1208  $cellmarkup = $cell_groups['cell'];
1209  $cellhead = $cell_groups['head'];
1210  $celldecorator = $cell_groups['cell_decorator'];
1211  # create table header cell or table data cell
1212  if ($cellhead) {
1213  $cellmarkup = trim($cellhead,'=');
1214  $cell_node = $this->_curnode = new DocNode(DocNode::TABLE_HEADCELL, $row_node);
1215  } else {
1216  $cell_node = $this->_curnode = new DocNode(DocNode::TABLE_CELL, $row_node);
1217  }
1218  # apply decoration to cell node
1219  if ($celldecorator) $this->set_node_decoration($cell_node,$celldecorator);
1220  # restore links, preformats, macros and images to current cell
1221  $cellmarkup = preg_replace($this->_tablerow_markers,$this->_tablerow_markup,$cellmarkup);
1222  # process cell inline markup
1223  $this->_leaftextnode = NULL;
1224  preg_replace_callback($this->inline_re, array($this,'_create_node'), $cellmarkup);
1225  }
1226  # set reference back to table node
1227  $this->_curnode = $tablenode;
1228  $this->_leaftextnode = NULL;
1229  # reset table setaside structure
1230  $this->_tablerow_markers = array();
1231  $this->_tablerow_markup = array();
1232  $this->_tablerow_count = 0;
1233  }
1234  #================================[ special decorators ]=============================#
1235  #--------------------------------[ span decoration ]--------------------------------#
1236 
1245  protected function _span_node($preg_groups)
1246  {
1247  # span
1248  isset($preg_groups['span_decorator']) or ($preg_groups['span_decorator'] = '');
1249  $decorator = $preg_groups['span_decorator'];
1250  if ($decorator) // new span
1251  {
1252  $node = $this->_curnode = new DocNode(DocNode::SPAN, $this->_curnode);
1253  $this->set_node_decoration($node,$decorator);
1254  $this->_leaftextnode = NULL;
1255  }
1256  elseif ($this->_curnode->type == DocNode::SPAN) // closing existing span
1257  {
1258  if (!empty($this->_curnode->parent))
1259  {
1260  $this->_curnode = $this->_curnode->parent;
1261  $this->_leaftextnode = NULL;
1262  }
1263  }
1264  else // error, return text
1265  {
1266  if (is_null($this->_leaftextnode))
1267  $this->_leaftextnode = new DocNode(DocNode::TEXT, $this->_curnode);
1268  $this->_leaftextnode->textcontent .= $preg_groups['span'];
1269  }
1270  }
1271  #--------------------------------[ block dividers ]--------------------------------#
1272 
1280  protected function _blockdivider_node($preg_groups)
1281  {
1282  # empty block acting as block divider
1283  $decorator = $preg_groups['blockdivider_decorator'];
1284  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1286  $node = new DocNode(DocNode::BLOCKDIVIDER, $this->_curnode);
1287  if ($decorator) $this->set_node_decoration($node,$decorator);
1288  }
1289  #============================[ preformatted text ]=================================#
1290 
1297  protected function _code_node($preg_groups)
1298  {
1299  # preformatted inline text
1300  isset($preg_groups['code_text']) or ($preg_groups['code_text'] = '');
1301  isset($preg_groups['code_decorator']) or ($preg_groups['code_decorator'] = '');
1302  $codetext = $preg_groups['code_text'];
1303  $decorator = trim($preg_groups['code_decorator']);
1304 
1305  $node = new DocNode(DocNode::CODE, $this->_curnode);
1306  $node->textcontent = $codetext;
1307  if ($decorator) $this->set_node_decoration($node,$decorator);
1308  $this->_leaftextnode = NULL;
1309  }
1317  protected function _pre_node($preg_groups)
1318  {
1319  # process preformatted text
1320  isset($preg_groups['pre_type']) or ($preg_groups['pre_type'] = '');
1321  isset($preg_groups['pre_markup']) or ($preg_groups['pre_markup'] = '');
1322  isset($preg_groups['pre_decorator']) or ($preg_groups['pre_decorator'] = '');
1323  $type = $preg_groups['pre_type'];
1324  $text = $preg_groups['pre_markup'];
1325  $decorator = $preg_groups['pre_decorator'];
1326 
1327  $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1329  $text = preg_replace_callback($this->pre_escape_re,array($this,'remove_tilde'), $text);
1330  $node = new DocNode(DocNode::PREFORMATTED, $this->_curnode);
1331  $node->textcontent = $text;
1332  $node->section = $type?$type:'';
1333  if ($decorator) $this->set_node_decoration($node,$decorator);
1334  $this->_leaftextnode = NULL;
1335  }
1338  private function remove_tilde($preg_groups)
1339  {
1340  # used in pre processing of pre element
1341  return $preg_groups['indent'] . $preg_groups['rest'];
1342  }
1343  #================================[ advanced markup ]===============================#
1344  #--------------------------------[ block declarations ]------------------------------#
1345 
1353  protected function _blockdef_node($preg_groups)
1354  {
1355  # block definitions
1356  isset($preg_groups['block_selector']) or ($preg_groups['block_selector'] = '');
1357  isset($preg_groups['block_content']) or ($preg_groups['block_content'] = '');
1358  isset($preg_groups['block_decorator']) or ($preg_groups['block_decorator'] = '');
1359  isset($preg_groups['block_inline']) or ($preg_groups['block_inline'] = '');
1360  $name = $preg_groups['block_selector'];
1361  $content = $preg_groups['block_content'];
1362  $decorator = $preg_groups['block_decorator'];
1363  $inline = $preg_groups['block_inline'];
1364 
1365  $container = $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1366  array(DocNode::DOCUMENT,
1370  $node = $this->_curnode = new DocNode(DocNode::BLOCKDEF, $container);
1371  $node->blocktag = $name;
1372  if ($decorator) $this->set_node_decoration($node,$decorator);
1373 
1374  $this->_leaftextnode = NULL;
1375  if ($inline) $this->_parse_inline($inline);
1376  $this->_leaftextnode = NULL;
1377  if ($content) $this->_parse_block($content);
1378  $this->_curnode = $container;
1379  $this->_leaftextnode = NULL;
1380 
1381  }
1382  #-----------------------------------[ macros ]-------------------------------------#
1383 
1392  protected function _macro_node($preg_groups)
1393  {
1394  # Handles macros using the placeholder syntax.
1395  isset($preg_groups['macro_name']) or ($preg_groups['macro_name'] = '');
1396  isset($preg_groups['macro_text']) or ($preg_groups['macro_text'] = '');
1397  isset($preg_groups['macro_args']) or ($preg_groups['macro_args'] = '');
1398  $name = $preg_groups['macro_name'];
1399  $text = trim($preg_groups['macro_text']);
1400  $decorator = $preg_groups['macro_args'];
1401 
1402  $container = $this->_curnode;
1403  $node = new DocNode(DocNode::MACRO, $container);
1404  $node->macroname = $name;
1405 
1406  if ($decorator) $this->set_node_decoration($node,$decorator);
1407  if ($text)
1408  {
1409  $node->textcontent = $text;
1410  $this->_curnode = $node;
1411  $this->_leaftextnode = NULL;
1412  $this->_parse_inline($text);
1413  $this->_curnode = $container;
1414  }
1415  $this->_leaftextnode = NULL;
1416  }
1426  protected function _blockmacro_node($preg_groups)
1427  {
1428  # Handles macros using the placeholder syntax. block version
1429  isset($preg_groups['blockmacro_name']) or ($preg_groups['blockmacro_name'] = '');
1430  isset($preg_groups['blockmacro_text']) or ($preg_groups['blockmacro_text'] = '');
1431  isset($preg_groups['blockmacro_args']) or ($preg_groups['blockmacro_args'] = '');
1432  $name = $preg_groups['blockmacro_name'];
1433  $text = trim($preg_groups['blockmacro_text']);
1434  $decorator = $preg_groups['blockmacro_args'];
1435 
1436  $container = $this->_curnode = $this->get_selected_ancestor($this->_curnode,
1437  array(DocNode::DOCUMENT,DocNode::BLOCKDEF)); // different from macro
1438  $node = new DocNode(DocNode::MACRO, $this->_curnode);
1439  $node->macroname = $name;
1440  if ($decorator) $this->set_node_decoration($node,$decorator);
1441  if ($text)
1442  {
1443  $node->textcontent = $text;
1444  $this->_curnode = $node;
1445  $this->_leaftextnode = NULL;
1446  $this->_parse_inline($text);
1447  $this->_curnode = $container;
1448  }
1449  $this->_leaftextnode = NULL;
1450  }
1454  #------------------------------------------------------------------------------#
1455  #---------------------------[ debug functions ]--------------------------------#
1456  #------------------------------------------------------------------------------#
1457 
1459  public function display_regex() // for debug
1460  {
1461  echo 'BLOCK_RE ';
1462  var_dump($this->block_re);
1463  echo 'INLINE_RE ';
1464  var_dump($this->inline_re);
1465  echo 'link_inline_re ';
1466  var_dump($this->link_inline_re);
1467  echo 'item_inline_re ';
1468  var_dump($this->image_inline_re);
1469  echo 'item_inline_re ';
1470  var_dump($this->item_inline_re);
1471  echo 'defitem_inline_re ';
1472  var_dump($this->defitem_inline_re);
1473  echo 'CELL_RE ';
1474  var_dump($this->cell_re);
1475  echo 'PRE_ESCAPE_RE ';
1476  var_dump($this->pre_escape_re);
1477  echo 'DECORATOR_RE ';
1478  var_dump($this->decorator_re);
1479  echo 'TABLEROW_SETASIDE_RE ';
1480  var_dump($this->tablerow_setaside_re);
1481  }
1484  public function display_dom($root) // for debug
1485  {
1486  $count = 1;
1487  $rootarray = array();
1488  $count += $this->display_dom_add_child($root,$rootarray);
1489  $rootarray = $rootarray[0];
1490  print_r($rootarray);
1491  return $count;
1492  }
1496  protected function display_dom_add_child($node,&$childarray) // for debug
1497  {
1498  $nodearray = $node->get_display_list();
1499  $children = $node->children;
1500  $count = 0;
1501  if (!empty($children))
1502  {
1503  $nodearray['children'] = array();
1504  foreach ($children as $child)
1505  $count+= $this->display_dom_add_child($child,$nodearray['children']);
1506  }
1507  $childarray[] = $nodearray;
1508  return count($children) + $count;
1509  }
1511 }