Deutsch English Français Italiano |
<v8s69j$1c0n2$1@dont-email.me> View for Bookmarking (what is this?) Look up another Usenet article |
Path: ...!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!.POSTED!not-for-mail From: "B. Pym" <Nobody447095@here-nor-there.org> Newsgroups: comp.lang.lisp Subject: Re: Weird problem Date: Tue, 6 Aug 2024 03:46:09 -0000 (UTC) Organization: A noiseless patient Spider Lines: 130 Message-ID: <v8s69j$1c0n2$1@dont-email.me> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Injection-Date: Tue, 06 Aug 2024 05:46:10 +0200 (CEST) Injection-Info: dont-email.me; posting-host="3b22668de36180485571eeb8efed0a5f"; logging-data="1442530"; mail-complaints-to="abuse@eternal-september.org"; posting-account="U2FsdGVkX1/2i+t6oq60EsPwAEliuFqM" User-Agent: XanaNews/1.18.1.6 Cancel-Lock: sha1:7z9aPW/cPvMY/t1Pwi0YfbVBOLA= Bytes: 5723 Pierre Mai wrote: > Here is a simple solution, which assumes that only one way exists to > split a word, and that a simple heuristic suffices to disambiguate > between possible matches at a given point (implemented are first-match > and longest-first-match, via the operations SIMPLE-FIND-PART and > GREEDY-FIND-PART). > > If this isn't sufficient, you can either try to change the code below > to support some form of backtracking/non-determinism, or you can check > out Screamer, which is an extension to Common Lisp for non-determinstic > programming, which makes this task much easier IMHO. Screamer is by > Jeffrey Mark Siskind (http://www.neci.nj.nec.com/homepages/qobi). > > I'd recommend using Screamer, since I'd imagine you will want to > process your word fragments further, and most things NLP will imply > some non-determinism. > > Regs, Pierre. > > ;;; Utility function > > (defun starts-with (string start-string &key (start 0)) > (let ((start-length (+ start (length start-string)))) > (and (>= (length string) start-length) > (string-equal string start-string :start1 start > :end1 start-length)))) > > ;;; The different part-finders, which implement first-match and > ;;; longest-first-match heuristics respectively. > > (defun simple-find-part (string part-list &key (start 0)) > (dolist (part part-list) > (when (starts-with string part :start start) > (return part)))) > > (defun greedy-find-part (string part-list &key (start 0)) > (loop with result = nil > with length = 0 > for part in part-list > do > (when (and (starts-with string part :start start) > (> (length part) length)) > (setq result part > length (length part))) > finally > (return result))) > > ;;; The main function. > > (defun break-apart (word part-finder &rest part-lists) > (loop with word-length = (length word) > for index = 0 then (+ index (length part)) > for part-list in part-lists > for part = (funcall part-finder word part-list :start index) > never (or (not part) (>= index word-length)) > collect part into result > finally > (return (and (= index word-length) > result)))) > > ;;; Examples > > #| > * (break-apart "astronaut" #'simple-find-part > '("as" "co" "ast") '("tro" "ro" "mp") > '("na" "ut") '("ut" "er")) > ("as" "tro" "na" "ut") > * (break-apart "astronaut" #'greedy-find-part > '("as" "co" "ast") '("tro" "ro" "mp") > '("na" "ut") '("ut" "er")) > ("ast" "ro" "na" "ut") > * (break-apart "astronaut" #'simple-find-part > '("as" "co" "ast") '("tro" "mp") > '("na" "ut") '("ut" "er")) > ("as" "tro" "na" "ut") > * (break-apart "astronaut" #'greedy-find-part > '("as" "co" "ast") '("tro" "mp") > '("na" "ut") '("ut" "er")) > NIL newLISP (define (cartesian-product lists) (if (null? lists) '(()) (let (subproduct (cartesian-product (rest lists))) (apply append (map (fn (x) (map (fn (xs) (cons x xs)) subproduct)) (first lists)))))) (define (good? xs) (= (apply string xs) "magnetohydrodynamics")) (filter good? (cartesian-product '(("mag" "ma" "ho" "magn" "in") ("eto" "net" "et") ("ohy" "o" "od" "oh") ("hy" "hyd" "ma" "hi") ("od" "drod" "rod") ("y" "yj" "yn" "yna") ("m" "am" "nam" "nami") ("ic" "is" "i") ("s" "cs")))) (("mag" "net" "o" "hy" "drod" "y" "nam" "ic" "s") ("mag" "net" "o" "hy" "drod" "y" "nam" "i" "cs") ("mag" "net" "o" "hy" "drod" "yn" "am" "ic" "s") ("mag" "net" "o" "hy" "drod" "yn" "am" "i" "cs") ("mag" "net" "o" "hy" "drod" "yna" "m" "ic" "s") ("mag" "net" "o" "hy" "drod" "yna" "m" "i" "cs") ("mag" "net" "o" "hyd" "rod" "y" "nam" "ic" "s") ("mag" "net" "o" "hyd" "rod" "y" "nam" "i" "cs") ("mag" "net" "o" "hyd" "rod" "yn" "am" "ic" "s") ("mag" "net" "o" "hyd" "rod" "yn" "am" "i" "cs") ("mag" "net" "o" "hyd" "rod" "yna" "m" "ic" "s") ("mag" "net" "o" "hyd" "rod" "yna" "m" "i" "cs") ("magn" "et" "o" "hy" "drod" "y" "nam" "ic" "s") ("magn" "et" "o" "hy" "drod" "y" "nam" "i" "cs") ("magn" "et" "o" "hy" "drod" "yn" "am" "ic" "s") ("magn" "et" "o" "hy" "drod" "yn" "am" "i" "cs") ("magn" "et" "o" "hy" "drod" "yna" "m" "ic" "s") ("magn" "et" "o" "hy" "drod" "yna" "m" "i" "cs") ("magn" "et" "o" "hyd" "rod" "y" "nam" "ic" "s") ("magn" "et" "o" "hyd" "rod" "y" "nam" "i" "cs") ("magn" "et" "o" "hyd" "rod" "yn" "am" "ic" "s") ("magn" "et" "o" "hyd" "rod" "yn" "am" "i" "cs") ("magn" "et" "o" "hyd" "rod" "yna" "m" "ic" "s") ("magn" "et" "o" "hyd" "rod" "yna" "m" "i" "cs"))