Deutsch   English   Français   Italiano  
<v8s69j$1c0n2$1@dont-email.me>

View for Bookmarking (what is this?)
Look up another Usenet article

Path: ...!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!.POSTED!not-for-mail
From: "B. Pym" <Nobody447095@here-nor-there.org>
Newsgroups: comp.lang.lisp
Subject: Re: Weird problem
Date: Tue, 6 Aug 2024 03:46:09 -0000 (UTC)
Organization: A noiseless patient Spider
Lines: 130
Message-ID: <v8s69j$1c0n2$1@dont-email.me>
MIME-Version: 1.0
Content-Type: text/plain; charset=iso-8859-1
Injection-Date: Tue, 06 Aug 2024 05:46:10 +0200 (CEST)
Injection-Info: dont-email.me; posting-host="3b22668de36180485571eeb8efed0a5f";
	logging-data="1442530"; mail-complaints-to="abuse@eternal-september.org";	posting-account="U2FsdGVkX1/2i+t6oq60EsPwAEliuFqM"
User-Agent: XanaNews/1.18.1.6
Cancel-Lock: sha1:7z9aPW/cPvMY/t1Pwi0YfbVBOLA=
Bytes: 5723

Pierre Mai wrote:

> Here is a simple solution, which assumes that only one way exists to
> split a word, and that a simple heuristic suffices to disambiguate
> between possible matches at a given point (implemented are first-match
> and longest-first-match, via the operations SIMPLE-FIND-PART and
> GREEDY-FIND-PART).
> 
> If this isn't sufficient, you can either try to change the code below
> to support some form of backtracking/non-determinism, or you can check
> out Screamer, which is an extension to Common Lisp for non-determinstic
> programming, which makes this task much easier IMHO.  Screamer is by
> Jeffrey Mark Siskind (http://www.neci.nj.nec.com/homepages/qobi).
> 
> I'd recommend using Screamer, since I'd imagine you will want to
> process your word fragments further, and most things NLP will imply
> some non-determinism.
> 
> Regs, Pierre.
> 
> ;;; Utility function
> 
> (defun starts-with (string start-string &key (start 0))
>   (let ((start-length (+ start (length start-string))))
>     (and (>= (length string) start-length)
>          (string-equal string start-string :start1 start
>                        :end1 start-length))))
> 
> ;;; The different part-finders, which implement first-match and
> ;;; longest-first-match heuristics respectively.
> 
> (defun simple-find-part (string part-list &key (start 0))
>   (dolist (part part-list)
>     (when (starts-with string part :start start)
>       (return part))))
> 
> (defun greedy-find-part (string part-list &key (start 0))
>   (loop with result = nil
>         with length = 0
>         for part in part-list
>         do
>         (when (and (starts-with string part :start start)
>                    (> (length part) length))
>           (setq result part
>                 length (length part)))
>         finally
>         (return result)))
> 
> ;;; The main function.
> 
> (defun break-apart (word part-finder &rest part-lists)
>   (loop with word-length = (length word)
>         for index = 0 then (+ index (length part))
>         for part-list in part-lists
>         for part = (funcall part-finder word part-list :start index)
>         never (or (not part) (>= index word-length))
>         collect part into result
>         finally
>         (return (and (= index word-length)
>                      result))))
> 
> ;;; Examples
> 
> #|
> * (break-apart "astronaut" #'simple-find-part
>              '("as" "co" "ast") '("tro" "ro" "mp")
>              '("na" "ut") '("ut" "er"))
> ("as" "tro" "na" "ut")
> * (break-apart "astronaut" #'greedy-find-part
>              '("as" "co" "ast") '("tro" "ro" "mp")
>              '("na" "ut") '("ut" "er"))
> ("ast" "ro" "na" "ut")
> * (break-apart "astronaut" #'simple-find-part
>              '("as" "co" "ast") '("tro" "mp")
>              '("na" "ut") '("ut" "er"))
> ("as" "tro" "na" "ut")
> * (break-apart "astronaut" #'greedy-find-part
>              '("as" "co" "ast") '("tro" "mp")
>              '("na" "ut") '("ut" "er"))
> NIL

newLISP

(define (cartesian-product lists)
  (if (null? lists)
    '(())
    (let (subproduct (cartesian-product (rest lists)))
      (apply append
        (map
          (fn (x) (map (fn (xs) (cons x xs)) subproduct))
          (first lists))))))

(define (good? xs) (= (apply string xs) "magnetohydrodynamics"))

(filter good?
  (cartesian-product
    '(("mag" "ma" "ho" "magn" "in")
      ("eto" "net" "et")
      ("ohy" "o" "od" "oh")
      ("hy" "hyd" "ma" "hi")
      ("od" "drod" "rod")
      ("y" "yj" "yn" "yna")
      ("m" "am" "nam" "nami")
      ("ic" "is" "i")
      ("s" "cs"))))

(("mag" "net" "o" "hy" "drod" "y" "nam" "ic" "s")
 ("mag" "net" "o" "hy" "drod" "y" "nam" "i" "cs")
 ("mag" "net" "o" "hy" "drod" "yn" "am" "ic" "s")
 ("mag" "net" "o" "hy" "drod" "yn" "am" "i" "cs")
 ("mag" "net" "o" "hy" "drod" "yna" "m" "ic" "s")
 ("mag" "net" "o" "hy" "drod" "yna" "m" "i" "cs")
 ("mag" "net" "o" "hyd" "rod" "y" "nam" "ic" "s")
 ("mag" "net" "o" "hyd" "rod" "y" "nam" "i" "cs")
 ("mag" "net" "o" "hyd" "rod" "yn" "am" "ic" "s")
 ("mag" "net" "o" "hyd" "rod" "yn" "am" "i" "cs")
 ("mag" "net" "o" "hyd" "rod" "yna" "m" "ic" "s")
 ("mag" "net" "o" "hyd" "rod" "yna" "m" "i" "cs")
 ("magn" "et" "o" "hy" "drod" "y" "nam" "ic" "s")
 ("magn" "et" "o" "hy" "drod" "y" "nam" "i" "cs")
 ("magn" "et" "o" "hy" "drod" "yn" "am" "ic" "s")
 ("magn" "et" "o" "hy" "drod" "yn" "am" "i" "cs")
 ("magn" "et" "o" "hy" "drod" "yna" "m" "ic" "s")
 ("magn" "et" "o" "hy" "drod" "yna" "m" "i" "cs")
 ("magn" "et" "o" "hyd" "rod" "y" "nam" "ic" "s")
 ("magn" "et" "o" "hyd" "rod" "y" "nam" "i" "cs")
 ("magn" "et" "o" "hyd" "rod" "yn" "am" "ic" "s")
 ("magn" "et" "o" "hyd" "rod" "yn" "am" "i" "cs")
 ("magn" "et" "o" "hyd" "rod" "yna" "m" "ic" "s")
 ("magn" "et" "o" "hyd" "rod" "yna" "m" "i" "cs"))