Difference between Matlab and Octave using regexp with 'once'

View: New views
5 Messages — Rating Filter:   Alert me  

Difference between Matlab and Octave using regexp with 'once'

by Neitzel Karl-Ernst SGD EAZE * :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Difference between Matlab and Octave using regexp with 'once'

MATLAB 7.5.0.342 (R2007b):

>> regexp('This is an example','^\S*','match')

ans =

    'This'

>> regexp('This is an example','^\S*','match','once')

ans =

This



OCTAVE 3.0.1:

>> regexp('This is an example','^\S*','match')
ans =

{
  [1,1] = This
}

>> regexp('This is an example','^\S*','match','once')
ans =

{
  [1,1] = This
}



----------------
Vorsitzender des Aufsichtsrats: Hans-Georg Härter
Geschäftsführung: Michael Hankel (Vorsitz), Bertram Hoffmann, Wolfgang Runge, Karl-Heinz Schrödl, Wolfgang Zeitz
Sitz: Schwäbisch Gmünd - Handelsregistereintrag: Amtsgericht Ulm HRB 701678

Achtung: Die Werke der ZFLS in Schwäbisch Gmünd liegen seit 1. März 2008 in einer Umweltzone
http://www.zf-lenksysteme.com/index.php?bwp=m:9-n:20-s:-o:-l:1-p:1-na:2



_______________________________________________
Bug-octave mailing list
Bug-octave@...
https://www.cae.wisc.edu/mailman/listinfo/bug-octave

Re: Difference between Matlab and Octave using regexp with 'once'

by Jaroslav Hajek-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

The attached patch fixes Octave's behaviour to be Matlab compatible.

2008/6/18 Neitzel Karl-Ernst SGD EAZE * <Karl-Ernst.Neitzel@...>:

> MATLAB 7.5.0.342 (R2007b):
>
>>> regexp('This is an example','^\S*','match')
>
> ans =
>
>     'This'
>
>>> regexp('This is an example','^\S*','match','once')
>
> ans =
>
> This
>
> OCTAVE 3.0.1:
>
>>> regexp('This is an example','^\S*','match')
> ans =
>
> {
>   [1,1] = This
> }
>
>>> regexp('This is an example','^\S*','match','once')
> ans =
>
> {
>   [1,1] = This
> }
>
> ----------------
> Vorsitzender des Aufsichtsrats: Hans-Georg Härter
> Geschäftsführung: Michael Hankel (Vorsitz), Bertram Hoffmann, Wolfgang
> Runge, Karl-Heinz Schrödl, Wolfgang Zeitz
> Sitz: Schwäbisch Gmünd - Handelsregistereintrag: Amtsgericht Ulm HRB 701678
>
> Achtung: Die Werke der ZFLS in Schwäbisch Gmünd liegen seit 1. März 2008 in
> einer Umweltzone
> http://www.zf-lenksysteme.com/index.php?bwp=m:9-n:20-s:-o:-l:1-p:1-na:2
>
>
>
> _______________________________________________
> Bug-octave mailing list
> Bug-octave@...
> https://www.cae.wisc.edu/mailman/listinfo/bug-octave
>
>


--
RNDr. Jaroslav Hajek
computing expert
Aeronautical Research and Test Institute (VZLU)
Prague, Czech Republic
url: www.highegg.matfyz.cz

[regexp-fix.diff]

# HG changeset patch
# User Jaroslav Hajek <highegg@...>
# Date 1213815606 -7200
# Node ID a1ce5797afbd156d3dd8bb006984e5e98fdfe547
# Parent  0280a546622c42bf60fb8a2839297691996359dd
make regexp(...,'once') matlab compatible

diff --git a/src/DLD-FUNCTIONS/regexp.cc b/src/DLD-FUNCTIONS/regexp.cc
--- a/src/DLD-FUNCTIONS/regexp.cc
+++ b/src/DLD-FUNCTIONS/regexp.cc
@@ -83,17 +83,17 @@ static int
 static int
 octregexp_list (const octave_value_list &args, const std::string &nm,
  bool case_insensitive, std::list<regexp_elem> &lst,
- string_vector &named, int &nopts)
+ string_vector &named, int &nopts, bool &once)
 {
   int sz = 0;
 #if defined (HAVE_REGEX) || defined (HAVE_PCRE)
   int nargin = args.length();
-  bool once = false;
   bool lineanchors = false;
   bool dotexceptnewline = false;
   bool freespacing = false;
 
   nopts = nargin - 2;
+  once = false;
 
   std::string buffer = args(0).string_value ();
   if (error_state)
@@ -451,7 +451,8 @@ octregexp (const octave_value_list &args
   std::list<regexp_elem> lst;
   string_vector named;
   int nopts;
-  int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts);
+  bool once;
+  int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once);
 
   if (! error_state)
     {
@@ -482,36 +483,70 @@ octregexp (const octave_value_list &args
       retval(5) = Octave_map();
 #endif
 
-      Cell t (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- t(i++) = p->t;
-      retval(4) = t;
+      if (once)
+        retval(4) = sz ? lst.front ().t : Cell();
+      else
+        {
+          Cell t (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            t(i++) = p->t;
+          retval(4) = t;
+        }
 
-      Cell m (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- m(i++) = p->m;
-      retval(3) = m;
+      if (once)
+        retval(3) = sz ? lst.front ().m : std::string();
+      else
+        {
+          Cell m (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            m(i++) = p->m;
+          retval(3) = m;
+        }
 
+      if (once)
+        retval(2) = sz ? lst.front ().te : Matrix();
+      else
+        {
+          Cell te (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            te(i++) = p->te;
+          retval(2) = te;
+        }
 
-      Cell te (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- te(i++) = p->te;
-      retval(2) = te;
+      if (once)
+        {
+          if (sz)
+            retval(1) = lst.front ().e;
+          else
+            retval(1) = Matrix();
+        }
+      else
+        {
+          NDArray e (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            e(i++) = p->e;
+          retval(1) = e;
+        }
 
-      NDArray e (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- e(i++) = p->e;
-      retval(1) = e;
-
+      if (once)
+        {
+          if (sz)
+            retval(0) = lst.front ().s;
+          else
+            retval(0) = Matrix();
+        }
+      else
+        {
       NDArray s (dim_vector(1, sz));
       i = 0;
       for (const_iterator p = lst.begin(); p != lst.end(); p++)
  s(i++) = p->s;
       retval(0) = s;
+        }
 
       // Alter the order of the output arguments
       if (nopts > 0)
@@ -911,21 +946,17 @@ The pattern is taken literally.\n\
 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'short')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'short')
+%! assert (isempty(t))
 
 %!test
 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'short')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'short')
+%! assert (isempty(t))
 
 %!testif HAVE_PCRE
 %! ## This test is expected to fail if PCRE is not installed
@@ -1087,21 +1118,17 @@ if there are none. See @code{regexp} for
 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'ShoRt')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'ShoRt')
+%! assert (isempty(t))
 
 %!test
 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'ShoRt')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'ShoRt')
+%! assert (isempty(t))
 
 %!testif HAVE_PCRE
 %! ## This test is expected to fail if PCRE is not installed
@@ -1237,7 +1264,8 @@ octregexprep (const octave_value_list &a
       std::list<regexp_elem> lst;
       string_vector named;
       int nopts;
-      int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts);
+      bool once;
+      int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once);
 
       if (error_state)
  return retval;
@@ -1323,7 +1351,8 @@ octregexprep (const octave_value_list &a
       std::list<regexp_elem> lst;
       string_vector named;
       int nopts;
-      int sz = octregexp_list (regexpargs, nm, false, lst, named,nopts);
+      bool once;
+      int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once);
 
       if (error_state)
  return retval;


_______________________________________________
Bug-octave mailing list
Bug-octave@...
https://www.cae.wisc.edu/mailman/listinfo/bug-octave

Re: Difference between Matlab and Octave using regexp with 'once'

by John W. Eaton :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On 18-Jun-2008, Jaroslav Hajek wrote:

| The attached patch fixes Octave's behaviour to be Matlab compatible.

Will you also please send a ChangeLog entry for this change?

Thanks,

jwe
_______________________________________________
Bug-octave mailing list
Bug-octave@...
https://www.cae.wisc.edu/mailman/listinfo/bug-octave

Re: Difference between Matlab and Octave using regexp with 'once'

by Jaroslav Hajek-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Oops, sorry. Here it is, refreshed.

On Wed, Jun 18, 2008 at 9:59 PM, John W. Eaton <jwe@...> wrote:

> On 18-Jun-2008, Jaroslav Hajek wrote:
>
> | The attached patch fixes Octave's behaviour to be Matlab compatible.
>
> Will you also please send a ChangeLog entry for this change?
>
> Thanks,
>
> jwe
>


--
RNDr. Jaroslav Hajek
computing expert
Aeronautical Research and Test Institute (VZLU)
Prague, Czech Republic
url: www.highegg.matfyz.cz

[regexp-fix.diff]

# HG changeset patch
# User Jaroslav Hajek <highegg@...>
# Date 1213815606 -7200
# Node ID 54157a9c9e35827c158625baec6e25f3ba4eb794
# Parent  76142609e8d2002346cf715a15295e98a789a55c
make regexp(...,'once') matlab compatible

diff --git a/src/ChangeLog b/src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,10 @@
+2008-06-19  Jaroslav Hajek <highegg@...>
+
+ * DLD-FUNCTIONS/regexp.cc (octregexp_list): Make "once" an output
+ argument.
+ (octregexp): Do not use cell arrays when "once" is requested.
+
+
 2008-06-12  David Bateman  <dbateman@...>
 
  * DLD-FUNCTIONS/qr.cc (Fqrupdate, Fqrinsert, Fqrshift, Fqrdelete):
diff --git a/src/DLD-FUNCTIONS/regexp.cc b/src/DLD-FUNCTIONS/regexp.cc
--- a/src/DLD-FUNCTIONS/regexp.cc
+++ b/src/DLD-FUNCTIONS/regexp.cc
@@ -83,17 +83,17 @@
 static int
 octregexp_list (const octave_value_list &args, const std::string &nm,
  bool case_insensitive, std::list<regexp_elem> &lst,
- string_vector &named, int &nopts)
+ string_vector &named, int &nopts, bool &once)
 {
   int sz = 0;
 #if defined (HAVE_REGEX) || defined (HAVE_PCRE)
   int nargin = args.length();
-  bool once = false;
   bool lineanchors = false;
   bool dotexceptnewline = false;
   bool freespacing = false;
 
   nopts = nargin - 2;
+  once = false;
 
   std::string buffer = args(0).string_value ();
   if (error_state)
@@ -451,7 +451,8 @@
   std::list<regexp_elem> lst;
   string_vector named;
   int nopts;
-  int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts);
+  bool once;
+  int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once);
 
   if (! error_state)
     {
@@ -482,36 +483,70 @@
       retval(5) = Octave_map();
 #endif
 
-      Cell t (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- t(i++) = p->t;
-      retval(4) = t;
+      if (once)
+        retval(4) = sz ? lst.front ().t : Cell();
+      else
+        {
+          Cell t (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            t(i++) = p->t;
+          retval(4) = t;
+        }
 
-      Cell m (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- m(i++) = p->m;
-      retval(3) = m;
+      if (once)
+        retval(3) = sz ? lst.front ().m : std::string();
+      else
+        {
+          Cell m (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            m(i++) = p->m;
+          retval(3) = m;
+        }
 
+      if (once)
+        retval(2) = sz ? lst.front ().te : Matrix();
+      else
+        {
+          Cell te (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            te(i++) = p->te;
+          retval(2) = te;
+        }
 
-      Cell te (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- te(i++) = p->te;
-      retval(2) = te;
+      if (once)
+        {
+          if (sz)
+            retval(1) = lst.front ().e;
+          else
+            retval(1) = Matrix();
+        }
+      else
+        {
+          NDArray e (dim_vector(1, sz));
+          i = 0;
+          for (const_iterator p = lst.begin(); p != lst.end(); p++)
+            e(i++) = p->e;
+          retval(1) = e;
+        }
 
-      NDArray e (dim_vector(1, sz));
-      i = 0;
-      for (const_iterator p = lst.begin(); p != lst.end(); p++)
- e(i++) = p->e;
-      retval(1) = e;
-
+      if (once)
+        {
+          if (sz)
+            retval(0) = lst.front ().s;
+          else
+            retval(0) = Matrix();
+        }
+      else
+        {
       NDArray s (dim_vector(1, sz));
       i = 0;
       for (const_iterator p = lst.begin(); p != lst.end(); p++)
  s(i++) = p->s;
       retval(0) = s;
+        }
 
       // Alter the order of the output arguments
       if (nopts > 0)
@@ -911,21 +946,17 @@
 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'short')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'short')
+%! assert (isempty(t))
 
 %!test
 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'short')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'short')
+%! assert (isempty(t))
 
 %!testif HAVE_PCRE
 %! ## This test is expected to fail if PCRE is not installed
@@ -1087,21 +1118,17 @@
 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'ShoRt')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'ShoRt')
+%! assert (isempty(t))
 
 %!test
 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,5)
-%! assert (size(te), [1,1])
-%! assert (isempty(te{1}))
-%! assert (m{1},'ShoRt')
-%! ## Matlab gives [1,0] here but that seems wrong.
-%! assert (size(t), [1,1])
+%! assert (isempty(te))
+%! assert (m,'ShoRt')
+%! assert (isempty(t))
 
 %!testif HAVE_PCRE
 %! ## This test is expected to fail if PCRE is not installed
@@ -1237,7 +1264,8 @@
       std::list<regexp_elem> lst;
       string_vector named;
       int nopts;
-      int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts);
+      bool once;
+      int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once);
 
       if (error_state)
  return retval;
@@ -1323,7 +1351,8 @@
       std::list<regexp_elem> lst;
       string_vector named;
       int nopts;
-      int sz = octregexp_list (regexpargs, nm, false, lst, named,nopts);
+      bool once;
+      int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once);
 
       if (error_state)
  return retval;


_______________________________________________
Bug-octave mailing list
Bug-octave@...
https://www.cae.wisc.edu/mailman/listinfo/bug-octave

Re: Difference between Matlab and Octave using regexp with 'once'

by John W. Eaton :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On 19-Jun-2008, Jaroslav Hajek wrote:

| Oops, sorry. Here it is, refreshed.
|
| On Wed, Jun 18, 2008 at 9:59 PM, John W. Eaton <jwe@...> wrote:
| > On 18-Jun-2008, Jaroslav Hajek wrote:
| >
| > | The attached patch fixes Octave's behaviour to be Matlab compatible.
| >
| > Will you also please send a ChangeLog entry for this change?

I applied the changeset.

Thanks,

jwe
_______________________________________________
Bug-octave mailing list
Bug-octave@...
https://www.cae.wisc.edu/mailman/listinfo/bug-octave
LightInTheBox - Buy quality products at wholesale price