roxen.lists.roxen.general

Subject Author Date
[PATCH 09/17] Add support for csv quoting Stephen R. van den Berg <srb[at]cuci[dot]nl> 20-01-2009
---

 server/etc/modules/Roxen.pmod     |   12 ++++++++
 server/modules/tags/rxmltags.pike |   59 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/server/etc/modules/Roxen.pmod b/server/etc/modules/Roxen.pmod
index 24da069..37cb3c3 100644
--- a/server/etc/modules/Roxen.pmod
+++ b/server/etc/modules/Roxen.pmod
@@ -2438,6 +2438,13 @@ protected string low_roxen_encode(string val, string
encoding)
 		    ({ "'", "\"" }),
 		    ({ "''", "\"'\"'\"" }) );
 
+   case "csv":
+     return sizeof(val)
+      &&(val[0]==' '||val[0]=='\t'||val[-1]==' '||val[-1]=='\t'
+       ||has_value(val,",")||has_value(val,"\"")||has_value(val,"\n")>=0)
+       ?"\""+replace(val,"\"","\"\"")+"\""
+       :val;
+
    default:
      // Unknown encoding. Let the caller decide what to do with it.
      return 0;
@@ -2473,6 +2480,11 @@ protected string low_roxen_encode(string val, string
encoding)
 //!     @expr. Requires octet (i.e. non-wide) strings.
 //!     C.f. @[String.string2hex].
 //!
+//!   @value "csv"
+//!     CSV (Comma Separated Values) encoding, properly quotes all
+//!     separator characters in CSV records (comma, semicolon, double-quotes,
+//!	leading spaces and newlines).
+//!
 //!   @value "base64"
 //!   @value "base-64"
 //!   @value "b64"
diff --git a/server/modules/tags/rxmltags.pike
b/server/modules/tags/rxmltags.pike
index 2969bb1..40f1303 100644
--- a/server/modules/tags/rxmltags.pike
+++ b/server/modules/tags/rxmltags.pike
@@ -7049,6 +7049,60 @@ class TagEmitValues {
 			    return word;
 			  });
 	  break;
+	case "csv":
+         { array out=({});
+           int i=0;
+	   string values=m->values;
+	   int len=sizeof(values);
+#define GETCHAR()	(i<len?values[i++]:-1)
+           array(string) words=({});
+           int c,leadspace=1,inquotes=0;
+           string word="";
+           for(c=GETCHAR();c>=0;)
+            { switch(c)
+               { case ',':case ';':
+                    if(!inquotes)
+                     { words+=();word="";leadspace=1;
+                       break;
+                     }
+                    word+=sprintf("%c",c);
+                    break;
+                 case '"':leadspace=0;
+                    if(!inquotes)
+                       inquotes=1;
+                    else if((c=GETCHAR())=='"')
+                       word+=sprintf("%c",c);
+                    else
+                     { inquotes=0;
+                       continue;
+                     }
+                    break;
+                 default:leadspace=0;
+                 case ' ':case '\t':
+                    if(!leadspace)
+                     { string s;
+		       sscanf(values[--i..],"%[^,;\"\r\x1a\n]",s);
+		       word+=s;
+		       i+=sizeof(s);
+		     }
+                    break;
+                 case -1:case '\r':case '\x1a':
+                    break;
+                 case '\n':
+                    if(!inquotes)
+                     { if(!sizeof(words)&&word=="")
+                          break;
+                       out+=()});
+		       word="";words=({});
+		       break;
+                     }
+                    word+=sprintf("%c",c);
+               }
+              c=GETCHAR();
+            }
+           m->values=!sizeof(out)&&word==""?"":out+()});
+	   break;
+         }
 	}
       }
       if(stringp(m->values)) {
@@ -11811,9 +11865,10 @@ Specify scope to test for existence.</p>
  string.</p>
 </attr>
 
-<attr name='advanced' value='lines|words|chars'><p>
+<attr name='advanced' value='lines|words|csv|chars'><p>
  If the value is a string it can be splitted into separate lines,
- words or characters by using this attribute.</p>
+ words, CSV (comma separated values) fields or characters by using this
+ attribute.</p>
 </attr>
 
 <attr name='case' value='upper|lower'><p>