Deutsch   English   Français   Italiano  
<861q1nfsjz.fsf@linuxsc.com>

View for Bookmarking (what is this?)
Look up another Usenet article

Path: ...!news.nobody.at!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!.POSTED!not-for-mail
From: Tim Rentsch <tr.17687@z991.linuxsc.com>
Newsgroups: comp.lang.c
Subject: Re: Command line globber/tokenizer library for C?
Date: Fri, 13 Sep 2024 09:05:04 -0700
Organization: A noiseless patient Spider
Lines: 67
Message-ID: <861q1nfsjz.fsf@linuxsc.com>
References: <lkbjchFebk9U1@mid.individual.net> <vbs1om$3jkch$1@raubtier-asyl.eternal-september.org> <vbsb94$1rsji$1@news.xmission.com> <vbsmlb$3o6n2$1@raubtier-asyl.eternal-september.org> <vbsu1d$3p7pp$1@dont-email.me> <vbtj88$1kpm$1@raubtier-asyl.eternal-september.org> <vbujak$733i$3@dont-email.me> <vbum9i$8h2o$1@dont-email.me> <vbur72$99cr$1@dont-email.me> <20240912181625.00006e68@yahoo.com> <vbv4ra$b0hv$2@dont-email.me> <vbv6r1$bhc9$1@raubtier-asyl.eternal-september.org> <20240912223828.00005c10@yahoo.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Injection-Date: Fri, 13 Sep 2024 18:05:05 +0200 (CEST)
Injection-Info: dont-email.me; posting-host="507a3b8b7a0fb0f1cc287e0540bbfcb5";
	logging-data="999308"; mail-complaints-to="abuse@eternal-september.org";	posting-account="U2FsdGVkX1/AqSNpq1tuuD6tjXUj+tItpQ61hK1bdTA="
User-Agent: Gnus/5.11 (Gnus v5.11) Emacs/22.4 (gnu/linux)
Cancel-Lock: sha1:jAnKn909+M24Sz0i9wLGA6YTUgk=
	sha1:piP78p9qlzzAhPWiP08Ql+UGcwo=
Bytes: 3208

Michael S <already5chosen@yahoo.com> writes:

[..iterate over words in a string..]

> #include <stddef.h>
>
> void parse(const char* src,
>   void (*OnToken)(const char* beg, size_t len, void* context),
>   void* context) {
>   char c0 = ' ', c1 = '\t';
>   const char* beg = 0;
>   for (;;src++) {
>     char c = *src;
>     if (c == c0 || c == c1 || c == 0) {
>       if (beg) {
>         OnToken(beg, src-beg, context);
>         c0 = ' ', c1 = '\t';
>         beg = 0;
>       }
>       if (c == 0)
>         break;
>     } else if (!beg) {
>       beg = src;
>       if (c == '"') {
>         c0 = c1 = c;
>         ++beg;
>       }
>     }
>   }
> }

I couldn't resist writing some code along similar lines.  The
entry point is words_do(), which returns one on success and
zero if the end of string is reached inside double quotes.


typedef struct gopher_s *Gopher;
struct gopher_s { void (*f)( Gopher, const char *, const char * ); };

static  _Bool   collect_word( const char *, const char *, _Bool, Gopher );
static  _Bool   is_space( char );


_Bool
words_do( const char *s, Gopher go ){
  char   c      =  *s;

    return
      is_space(c)       ?  words_do( s+1, go )                          :
      c                 ?  collect_word( s, s, 1, go )                  :
      /***************/    1;
}

_Bool
collect_word( const char *s, const char *r, _Bool w, Gopher go ){
  char   c      =  *s;

    return
      c == 0            ?  go->f( go, r, s ),  w                        :
      is_space(c) && w  ?  go->f( go, r, s ),  words_do( s, go )        :
      /***************/    collect_word( s+1, r, w ^ c == '"', go );
}

_Bool
is_space( char c ){
    return  c == ' '  ||  c == '\t';
}