roxen.lists.pike.general

Subject Author Date
Appendix (SGML.pike) to: submit modify of Parser.SGML PeterPan <zenothing[at]hotmail[dot]com> 04-04-2009
//
// $Id: SGML.pike,v 1.3 2008/06/28 16:36:55 nilsson Exp $

#pike __REAL_VERSION__

class SGML
//!  	This is a handy simple parser of SGML-like
//!	syntax like HTML. It doesn't do anything advanced,
//!	but finding the corresponding end-tags.
//!	
//!	It's used like this:
//! @code
//! array res=Parser.SGML()->feed(string)->finish()->result();
//! @endcode
//!
//!	The resulting structure is an array of atoms,
//!	where the atom can be a string or a tag.
//!	A tag contains a similar array, as data. 
//!     
//! @example
//!	A string
//!    
@expr
//!     results in 
//!@code
//!({
//!    tag "gat" object with data:
//!    ({
//!        tag "gurka" object with data:
//!	({
//!            " "
//!        })
//!        tag "banan" object with data:
//!	({
//!            " "
//!            tag "kiwi" object with data:
//!	    ({
//!               " "
//!            })
//!        })
//!    })
//!})
//!@endcode
//!             
//!	ie, simple "tags" (not containers) are not detected,
//!	but containers are ended implicitely by a surrounding
//!	container _with_ an end tag.
//!
//! 	The 'tag' is an object with the following variables:
//!	@pre{
//!	 string name;           - name of tag
//!	 mapping args;          - argument to tag
//!	 int line,char,column;  - position of tag
//!	 int eline,echar,ecolumn;  - end position of tag, src[char..echar-1] got the
block. add by Xuesong Guo
//!	 string file;           - filename (see <ref>create</ref>)
//!	 array(SGMLatom) data;  - contained data
//!	 int open;		- if has not an end tag. add by Xuesong Guo
//!     @}
//!
{
  //!
   string file;

   //!
   class SGMLatom
   {
      //!
      string name;
      mapping args;
      int line,char,column;
      int eline,echar,ecolumn;
      string file;
      array(SGMLatom) data=({});
      int open;

      protected string _sprintf(int t, mapping m)
      {
	 if (t=='O')
	 {
	    string res=name;
	    if (sizeof(args))
	       foreach ( (array)args, [string i,string v])
		  res+=sprintf(" %s=%O",i,v);

	    res="<"+res+(open?">":"/>");
	    string i=" "*(m->indent);
	    if (sizeof(data))
	       foreach (data,SGMLatom a)
		  res+=replace(sprintf("\n%O",a),
			       "\n","\n"+i);

	    return "SGMLatom("+res+")";
	 }
      }
   }

   protected array(array(SGMLatom|string)) res=()});
   protected array(SGMLatom) tagstack=({});
   protected array(object) errors;

   array(SGMLatom|string) data;

   protected private array(string) got_tag(object g,string s)
   {
      string name=name_formater?name_formater(g->tag_name()):g->tag_name();

      if (name!="" && name[0]=='/')
      {
	 int i=search(tagstack->name,name[1..]);
	 if (i!=-1) 
	 {
	    tagstack[i]->open=0;
	    i++;
	    while (i--)
	    {
	       SGMLatom t=tagstack[0];
	       t->data=res[0];
	       [t->eline,t->echar,t->ecolumn]=g->at();
	       res=res[1..];
	       tagstack=tagstack[1..];
	    }
	    return ({});
	 }
      }

      SGMLatom t=SGMLatom();
      t->name=name;
      if(argname_formater==0){
	      t->args=g->tag_args();
      }else{
	      t->args=([]);
	      foreach(g->tag_args;string k;mixed d){
		      t->args[argname_formater(k)]=d;
	      }
      }
      [t->line,t->char,t->column]=g->at();
      t->file=file;
      if(!has_suffix(s,"/>"))
	      t->open=1;
      for(int i=0;i<sizeof(res);i++){
	      //werror("%d:%d\n",sizeof(tagstack),sizeof(res));
	      if(i>=sizeof(tagstack)||tagstack[i]->open){
		      res[i]+=(); 
		      break;
	      }
      }
      //res[0]+=();
      tagstack=()+tagstack;
      res=()})+res;

      return ({}); // don't care
   }

   void debug(array|void arr,void|int level)
   {
      level+=2;
      if (!arr) arr=data;
      foreach (arr,string|SGMLatom t)
	 if (stringp(t))
	    write("%*s%-=*s\n",level,"",79-level,sprintf("%O",t));
	 else
	 {
	    write("%*stag %O\n",level,"",t->name,);
	    if (sizeof(t->args))
	       write("%*s%-=*s\n",level+4,"",75-level,sprintf("%O",t->args));
	    debug(t->data,level);
	 }
   }


   private protected object p=.HTML();

   //! @decl void create()
   //! @decl void create(string filename,function|void
name_formater,function|void argname_formater)
   //!	This object is created with this filename.
   //!	It's passed to all created tags, for debug and trace purposes.
   //!  All tag name will be replace as name_formater(name)
   //!  All arg_name will be replace as argname_formater(arg_name)
   //! @note
   //! 	No, it doesn't read the file itself. See @[feed()].

   protected int i;

   function name_formater;
   function argname_formater;
   void create(void|string _file,function|void _name_formater,function|void
_argname_formater)
   {
      file=_file;
      if(_name_formater)
	      name_formater=_name_formater;
      if(_argname_formater)
	      argname_formater=_argname_formater;

      p->_set_tag_callback(got_tag);
      p->_set_data_callback(lambda(object g,string data)
			    { 
			    if (data!="") {
			    	for(int i=0;i<sizeof(res);i++){
				//werror("%d:%d\n",sizeof(tagstack),sizeof(res));
					if(i>=sizeof(tagstack)||tagstack[i]->open){
						res[i]+=(); 
						break;
						}
				}
			    }
			    return ({}); });
   }

   //! @decl object feed(string s)
   //! @decl array(SGMLatom|string) finish()
   //! @decl array(SGMLatom|string) result(string s)
   //!	Feed new data to the object, or finish the stream.
   //!	No result can be used until @[finish()] is called.
   //!
   //! 	Both @[finish()] and @[result()] return the computed data.
   //!
   //!	@[feed()] returns the called object.

   this_program feed(string s)
   {
      p->feed(s);
      return this;
   }

   array(SGMLatom|string) finish()
   {
      p->finish();
      foreach ( tagstack, SGMLatom a )
      {
	 a->data+=res[0];
	 res=res[1..];
      }
      tagstack=({});
      data=res[0];
      res=0;
      return data;
   }

   array(SGMLatom|string) result()
   {
      return data;
   }
}