bstring.erl in the next release?

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

bstring.erl in the next release?

Sean Hinde-2
J?r?me,

>   I was seeking for a binary implementation of string functions
>   and someone gracefully pointed me to the mail archive at
>  
>  http://www.erlang.org/ml-archive/erlang-questions/200103/msg00134.html
>
>  What about integrating this module in the next Erlang release? (unless
>  it is already there).

I put together this driver sometime ago (finished off today) which does the
str and chr functions quite efficiently. Warning: I am not a C programmer by
trade. Please therefore treat with extreme caution. Please let me know if
you find any gremlins..

I'd also appreciate any comments anyone may have on how this looks or how it
could be improved. It could certainly do with a reverse function and perhaps
rstr, rchr if anyone felt the urge.

Of course if Ericsson made them into bifs that would be even better :)

Usage:

1> bstring:open().
true
2> bstring:chr(<<1,1,1,1,230,1,1>>, 230).
5
3> bstring:str(<<0,1,2,3,4,5,6,7,8>>, <<4,5,6>>).
5
4> bstring:str(<<0,1,2,3,4,5,6,7,8>>, <<4,5,7>>).
0

Enjoy,

Sean


---------bstring_drv.c-----------
/*
 * Created:  Sep 18 10:03:40 by shinde
 * Based on byteorder by tnt
 * Function: A little ddll driver for doing string searching.
 *
 * To compile (an example):
 *
 *   gcc -I /opt/rcs/5.0.2.5/usr/include\
 *       -fpic -shared -o bstring_drv.so bstring_drv.c
 *
 */
#include <stdio.h>
#include "erl_driver.h"

#define put_int32(i, s) {((char*)(s))[0] = (char)((i) >> 24) & 0xff; \
                        ((char*)(s))[1] = (char)((i) >> 16) & 0xff; \
                        ((char*)(s))[2] = (char)((i) >> 8)  & 0xff; \
                        ((char*)(s))[3] = (char)((i)        & 0xff);}

#define get_int32(s) ((((unsigned char*) (s))[0] << 24) | \
                      (((unsigned char*) (s))[1] << 16) | \
                      (((unsigned char*) (s))[2] << 8)  | \
                      (((unsigned char*) (s))[3]))

#define CHR 1
#define STR 2

typedef unsigned char uchar;

static ErlDrvData bstring_start(ErlDrvPort port, char *buf);
static void bstring_stop(ErlDrvData drv_data);
static int bstring_control(ErlDrvData drv_data, unsigned int command, char
*buf,
                   int len, char **rbuf, int rlen);

static ErlDrvEntry bstring_driver_entry = {
    NULL, /* init */
    bstring_start,
    bstring_stop,
    NULL, /* output */
    NULL, /* ready_input */
    NULL, /* ready_output */
    "bstring_drv",
    NULL, /* finish */
    NULL, /* handle */
    bstring_control,
    NULL, /* timeout */
    NULL /* outputv */
};

static ErlDrvPort erlang_port = (ErlDrvPort) -1;

/*
 * Initialize and return a driver entry struct
 */

DRIVER_INIT(bstring_drv)
{
  return &bstring_driver_entry;
}


static ErlDrvData bstring_start(ErlDrvPort port, char *buf)
{
      if ((int) erlang_port != -1) {
          return ((ErlDrvData) -1);
      }
      erlang_port = port;
      return ((ErlDrvData) port);
     
}

static void bstring_stop(ErlDrvData desc)
{
   
  erlang_port = ((ErlDrvPort) -1);
  return;

}

static int prefix(char *buf1, char *buf2, int n) {
  int j;
  for (j=0; j<n; j++) {
    if (*(buf1++) != *(buf2++))
      return 0;
  }
  return 1;
}

static int bstring_control(ErlDrvData drv_data, unsigned int command, char
*buf,
                   int length, char **rbuf, int rlen)
{
  int i=0;
  switch (command)
    {
    case CHR:
      {
        uchar c = *buf++;
        for(i = 0;i < length-1;i++)
          {
           
            if (*buf++ == c) {
              put_int32(i+1, *rbuf);
              return 4;
            }
           
          }
        put_int32(0, *rbuf);
        return 4;
      }
    case STR:
      {
        int bin_len, sub_bin_len;
        char *sub_bin;
        bin_len = get_int32(buf);
        buf+=4;
        sub_bin_len = get_int32(buf);
        buf +=4;
        sub_bin = buf + bin_len;
        if (sub_bin_len == 0 || bin_len == 0) {
          put_int32(0, *rbuf);
          return 4;
        }
        for(i = 0;i <= bin_len - sub_bin_len;i++)
          {
            if (*buf++ == *sub_bin) {
              if (prefix(buf, sub_bin+1, sub_bin_len-1)) {
                put_int32(i+1, *rbuf);
                return 4;
              }
            }
          }
        put_int32(0, *rbuf);
        return 4;
       
      }
    }
}

-----------bstring.erl--------------------

%%%----------------------------------------------------------------------
%%% File    : bstring.erl
%%% Author  :  <sean>
%%% Purpose :
%%% Created : 18 Sep 2001 by  <sean>
%%%----------------------------------------------------------------------

-module(bstring).
-author('sean').
-vsn('$Id: checksum.erl,v 1.2 2001/06/28 14:02:17 sean Exp $ ').

-define(CHR, 1).
-define(STR, 2).

-define(u32(X3,X2,X1,X0),
        (((X3) bsl 24) bor ((X2) bsl 16) bor ((X1) bsl 8) bor (X0))).

%%-compile(export_all).

-export([open/0,chr/2, str/2]).

open() ->
    erl_ddll:start(),
    erl_ddll:load_driver(".","bstring_drv"),
    Port = open_port({spawn, "bstring_drv"}, []),
    T = ets:new(bstring_server_table, [set, protected, named_table]),
    ets:insert(T, {port, Port}).


chr(Bin, C) when binary(Bin), integer(C) ->
    [{port, Port}| _] = ets:lookup(bstring_server_table, port),
    [X3,X2,X1,X0] = erlang:port_control(Port, ?CHR, [C, Bin]),
    ?u32(X3,X2,X1,X0).

str(Bin, Sub_bin) when binary(Bin), binary(Sub_bin) ->
    [{port, Port}| _] = ets:lookup(bstring_server_table, port),
    S1 = size(Bin),
    S2 = size(Sub_bin),
    [X3,X2,X1,X0] = erlang:port_control(Port, ?STR, [<<S1:32/integer,
                                         S2:32/integer>>, Bin, Sub_bin]),
    ?u32(X3,X2,X1,X0).



NOTICE AND DISCLAIMER:
This email (including attachments) is confidential.  If you have received
this email in error please notify the sender immediately and delete this
email from your system without copying or disseminating it or placing any
reliance upon its contents.  We cannot accept liability for any breaches of
confidence arising through use of email.  Any opinions expressed in this
email (including attachments) are those of the author and do not necessarily
reflect our opinions.  We will not accept responsibility for any commitments
made by our employees outside the scope of our business.  We do not warrant
the accuracy or completeness of such information.



Reply | Threaded
Open this post in threaded view
|

bstring.erl in the next release?

Ulf Wiger-4
On Tue, 18 Sep 2001, Sean Hinde wrote:

>I put together this driver sometime ago (finished off today)
>which does the str and chr functions quite efficiently.
>Warning: I am not a C programmer by trade. Please therefore
>treat with extreme caution. Please let me know if you find any
>gremlins..

One thing you might want to look at in OTP R8 is the named ports
feature. It should let you skip the part where you lookup the
port ID in an ets table.

/Uffe
--
Ulf Wiger                                    tfn: +46  8 719 81 95
Senior System Architect                      mob: +46 70 519 81 95
Strategic Product & System Management    ATM Multiservice Networks
Data Backbone & Optical Services Division      Ericsson Telecom AB



Reply | Threaded
Open this post in threaded view
|

bstring.erl in the next release?

Jérôme Marant-2
In reply to this post by Sean Hinde-2
Sean Hinde <Sean.Hinde> writes:


> I put together this driver sometime ago (finished off today) which does the
> str and chr functions quite efficiently. Warning: I am not a C programmer by
> trade. Please therefore treat with extreme caution. Please let me know if
> you find any gremlins..
>
> I'd also appreciate any comments anyone may have on how this looks or how it
> could be improved. It could certainly do with a reverse function and perhaps
> rstr, rchr if anyone felt the urge.

  Thanks. I'll give feedback.

>
> Of course if Ericsson made them into bifs that would be even better :)

  Better: implementing strings as binaries rather than lists.
  Everyone knows that managing strings with lists is the major cause of
  poor performances of some Erlang programs: one must not underestimate
  the number of programs processing strings.

  Cheers,

--
J?r?me Marant <jerome.marant>
              <jerome>