706 lines
33 KiB
Common Lisp
706 lines
33 KiB
Common Lisp
;;; swank-fuzzy.lisp --- fuzzy symbol completion
|
||
;;
|
||
;; Authors: Brian Downing <bdowning@lavos.net>
|
||
;; Tobias C. Rittweiler <tcr@freebits.de>
|
||
;; and others
|
||
;;
|
||
;; License: Public Domain
|
||
;;
|
||
|
||
|
||
(in-package :swank)
|
||
|
||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||
(swank-require :swank-util)
|
||
(swank-require :swank-c-p-c))
|
||
|
||
(defvar *fuzzy-duplicate-symbol-filter* :nearest-package
|
||
"Specifies how fuzzy-matching handles \"duplicate\" symbols.
|
||
Possible values are :NEAREST-PACKAGE, :HOME-PACKAGE, :ALL, or a custom
|
||
function. See Fuzzy Completion in the manual for details.")
|
||
|
||
(export '*fuzzy-duplicate-symbol-filter*)
|
||
|
||
;;; For nomenclature of the fuzzy completion section, please read
|
||
;;; through the following docstring.
|
||
|
||
(defslimefun fuzzy-completions (string default-package-name
|
||
&key limit time-limit-in-msec)
|
||
"Returns a list of two values:
|
||
|
||
An (optionally limited to LIMIT best results) list of fuzzy
|
||
completions for a symbol designator STRING. The list will be
|
||
sorted by score, most likely match first.
|
||
|
||
A flag that indicates whether or not TIME-LIMIT-IN-MSEC has
|
||
been exhausted during computation. If that parameter's value is
|
||
NIL or 0, no time limit is assumed.
|
||
|
||
The main result is a list of completion objects, where a completion
|
||
object is:
|
||
|
||
(COMPLETED-STRING SCORE (&rest CHUNKS) CLASSIFICATION-STRING)
|
||
|
||
where a CHUNK is a description of a matched substring:
|
||
|
||
(OFFSET SUBSTRING)
|
||
|
||
and FLAGS is short string describing properties of the symbol (see
|
||
SYMBOL-CLASSIFICATION-STRING).
|
||
|
||
E.g., completing \"mvb\" in a package that uses COMMON-LISP would
|
||
return something like:
|
||
|
||
((\"multiple-value-bind\" 26.588236 ((0 \"m\") (9 \"v\") (15 \"b\"))
|
||
(:FBOUNDP :MACRO))
|
||
...)
|
||
|
||
If STRING is package qualified the result list will also be
|
||
qualified. If string is non-qualified the result strings are
|
||
also not qualified and are considered relative to
|
||
DEFAULT-PACKAGE-NAME.
|
||
|
||
Which symbols are candidates for matching depends on the symbol
|
||
designator's format. The cases are as follows:
|
||
FOO - Symbols accessible in the buffer package.
|
||
PKG:FOO - Symbols external in package PKG.
|
||
PKG::FOO - Symbols accessible in package PKG."
|
||
;; For Emacs we allow both NIL and 0 as value of TIME-LIMIT-IN-MSEC
|
||
;; to denote an infinite time limit. Internally, we only use NIL for
|
||
;; that purpose, to be able to distinguish between "no time limit
|
||
;; alltogether" and "current time limit already exhausted." So we've
|
||
;; got to canonicalize its value at first:
|
||
(let* ((no-time-limit-p (or (not time-limit-in-msec)
|
||
(zerop time-limit-in-msec)))
|
||
(time-limit (if no-time-limit-p nil time-limit-in-msec)))
|
||
(multiple-value-bind (completion-set interrupted-p)
|
||
(fuzzy-completion-set string default-package-name :limit limit
|
||
:time-limit-in-msec time-limit)
|
||
;; We may send this as elisp [] arrays to spare a coerce here,
|
||
;; but then the network serialization were slower by handling arrays.
|
||
;; Instead we limit the number of completions that is transferred
|
||
;; (the limit is set from Emacs.)
|
||
(list (coerce completion-set 'list) interrupted-p))))
|
||
|
||
|
||
;;; A Fuzzy Matching -- Not to be confused with a fuzzy completion
|
||
;;; object that will be sent back to Emacs, as described above.
|
||
|
||
(defstruct (fuzzy-matching (:conc-name fuzzy-matching.)
|
||
(:predicate fuzzy-matching-p)
|
||
(:constructor make-fuzzy-matching
|
||
(symbol package-name score package-chunks
|
||
symbol-chunks &key (symbol-p t))))
|
||
symbol ; The symbol that has been found to match.
|
||
symbol-p ; To deffirentiate between completeing
|
||
; package: and package:nil
|
||
package-name ; The name of the package where SYMBOL was found in.
|
||
; (This is not necessarily the same as the home-package
|
||
; of SYMBOL, because the SYMBOL can be internal to
|
||
; lots of packages; also think of package nicknames.)
|
||
score ; The higher the better SYMBOL is a match.
|
||
package-chunks ; Chunks pertaining to the package identifier of SYMBOL.
|
||
symbol-chunks) ; Chunks pertaining to SYMBOL's name.
|
||
|
||
(defun %fuzzy-extract-matching-info (fuzzy-matching user-input-string)
|
||
(multiple-value-bind (_ user-package-name __ input-internal-p)
|
||
(parse-completion-arguments user-input-string nil)
|
||
(declare (ignore _ __))
|
||
(with-struct (fuzzy-matching. score symbol package-name package-chunks
|
||
symbol-chunks symbol-p)
|
||
fuzzy-matching
|
||
(let (symbol-name real-package-name internal-p)
|
||
(cond (symbol-p ; symbol fuzzy matching?
|
||
(setf symbol-name (symbol-name symbol))
|
||
(setf internal-p input-internal-p)
|
||
(setf real-package-name (cond ((keywordp symbol) "")
|
||
((not user-package-name) nil)
|
||
(t package-name))))
|
||
(t ; package fuzzy matching?
|
||
(setf symbol-name "")
|
||
(setf real-package-name package-name)
|
||
;; If no explicit package name was given by the user
|
||
;; (e.g. input was "asdf"), we want to append only
|
||
;; one colon ":" to the package names.
|
||
(setf internal-p (if user-package-name input-internal-p nil))))
|
||
(values symbol-name
|
||
real-package-name
|
||
(if user-package-name internal-p nil)
|
||
(completion-output-symbol-converter user-input-string)
|
||
(completion-output-package-converter user-input-string))))))
|
||
|
||
(defun fuzzy-format-matching (fuzzy-matching user-input-string)
|
||
"Returns the completion (\"foo:bar\") that's represented by FUZZY-MATCHING."
|
||
(multiple-value-bind (symbol-name package-name internal-p
|
||
symbol-converter package-converter)
|
||
(%fuzzy-extract-matching-info fuzzy-matching user-input-string)
|
||
(setq symbol-name (and symbol-name
|
||
(funcall symbol-converter symbol-name)))
|
||
(setq package-name (and package-name
|
||
(funcall package-converter package-name)))
|
||
(let ((result (untokenize-symbol package-name internal-p symbol-name)))
|
||
;; We return the length of the possibly added prefix as second value.
|
||
(values result (search symbol-name result)))))
|
||
|
||
(defun fuzzy-convert-matching-for-emacs (fuzzy-matching user-input-string)
|
||
"Converts a result from the fuzzy completion core into something
|
||
that emacs is expecting. Converts symbols to strings, fixes case
|
||
issues, and adds information (as a string) describing if the symbol is
|
||
bound, fbound, a class, a macro, a generic-function, a
|
||
special-operator, or a package."
|
||
(with-struct (fuzzy-matching. symbol score package-chunks symbol-chunks
|
||
symbol-p)
|
||
fuzzy-matching
|
||
(multiple-value-bind (name added-length)
|
||
(fuzzy-format-matching fuzzy-matching user-input-string)
|
||
(list name
|
||
(format nil "~,2f" score)
|
||
(append package-chunks
|
||
(mapcar (lambda (chunk)
|
||
;; Fix up chunk positions to account for possible
|
||
;; added package identifier.
|
||
(let ((offset (first chunk))
|
||
(string (second chunk)))
|
||
(list (+ added-length offset) string)))
|
||
symbol-chunks))
|
||
(if symbol-p
|
||
(symbol-classification-string symbol)
|
||
"-------p")))))
|
||
|
||
(defun fuzzy-completion-set (string default-package-name
|
||
&key limit time-limit-in-msec)
|
||
"Returns two values: an array of completion objects, sorted by
|
||
their score, that is how well they are a match for STRING
|
||
according to the fuzzy completion algorithm. If LIMIT is set,
|
||
only the top LIMIT results will be returned. Additionally, a flag
|
||
is returned that indicates whether or not TIME-LIMIT-IN-MSEC was
|
||
exhausted."
|
||
(check-type limit (or null (integer 0 #.(1- most-positive-fixnum))))
|
||
(check-type time-limit-in-msec
|
||
(or null (integer 0 #.(1- most-positive-fixnum))))
|
||
(multiple-value-bind (matchings interrupted-p)
|
||
(fuzzy-generate-matchings string default-package-name time-limit-in-msec)
|
||
(when (and limit
|
||
(> limit 0)
|
||
(< limit (length matchings)))
|
||
(if (array-has-fill-pointer-p matchings)
|
||
(setf (fill-pointer matchings) limit)
|
||
(setf matchings (make-array limit :displaced-to matchings))))
|
||
(map-into matchings #'(lambda (m)
|
||
(fuzzy-convert-matching-for-emacs m string))
|
||
matchings)
|
||
(values matchings interrupted-p)))
|
||
|
||
|
||
(defun fuzzy-generate-matchings (string default-package-name
|
||
time-limit-in-msec)
|
||
"Does all the hard work for FUZZY-COMPLETION-SET. If
|
||
TIME-LIMIT-IN-MSEC is NIL, an infinite time limit is assumed."
|
||
(multiple-value-bind (parsed-symbol-name parsed-package-name
|
||
package internal-p)
|
||
(parse-completion-arguments string default-package-name)
|
||
(flet ((fix-up (matchings parent-package-matching)
|
||
;; The components of each matching in MATCHINGS have been computed
|
||
;; relatively to PARENT-PACKAGE-MATCHING. Make them absolute.
|
||
(let* ((p parent-package-matching)
|
||
(p.name (fuzzy-matching.package-name p))
|
||
(p.score (fuzzy-matching.score p))
|
||
(p.chunks (fuzzy-matching.package-chunks p)))
|
||
(map-into
|
||
matchings
|
||
(lambda (m)
|
||
(let ((m.score (fuzzy-matching.score m)))
|
||
(setf (fuzzy-matching.package-name m) p.name)
|
||
(setf (fuzzy-matching.package-chunks m) p.chunks)
|
||
(setf (fuzzy-matching.score m)
|
||
(if (equal parsed-symbol-name "")
|
||
;; Make package matchings be sorted before all
|
||
;; the relative symbol matchings while preserving
|
||
;; over all orderness.
|
||
(/ p.score 100)
|
||
(+ p.score m.score)))
|
||
m))
|
||
matchings)))
|
||
(find-symbols (designator package time-limit &optional filter)
|
||
(fuzzy-find-matching-symbols designator package
|
||
:time-limit-in-msec time-limit
|
||
:external-only (not internal-p)
|
||
:filter (or filter #'identity)))
|
||
(find-packages (designator time-limit)
|
||
(fuzzy-find-matching-packages designator
|
||
:time-limit-in-msec time-limit))
|
||
(maybe-find-local-package (name)
|
||
(or (find-locally-nicknamed-package name *buffer-package*)
|
||
(find-package name))))
|
||
(let ((time-limit time-limit-in-msec) (symbols) (packages) (results)
|
||
(dedup-table (make-hash-table :test #'equal)))
|
||
(cond ((not parsed-package-name) ; E.g. STRING = "asd"
|
||
;; We don't know if user is searching for a package or a symbol
|
||
;; within his current package. So we try to find either.
|
||
(setf (values packages time-limit)
|
||
(find-packages parsed-symbol-name time-limit))
|
||
(setf (values symbols time-limit)
|
||
(find-symbols parsed-symbol-name package time-limit)))
|
||
((string= parsed-package-name "") ; E.g. STRING = ":" or ":foo"
|
||
(setf (values symbols time-limit)
|
||
(find-symbols parsed-symbol-name package time-limit)))
|
||
(t ; E.g. STRING = "asd:" or "asd:foo"
|
||
;; Find fuzzy matchings of the denoted package identifier part.
|
||
;; After that, find matchings for the denoted symbol identifier
|
||
;; relative to all the packages found.
|
||
(multiple-value-bind (symbol-packages rest-time-limit)
|
||
(find-packages parsed-package-name time-limit-in-msec)
|
||
;; We want to traverse the found packages in the order of
|
||
;; their score, since those with higher score presumably
|
||
;; represent better choices. (This is important because some
|
||
;; packages may never be looked at if time limit exhausts
|
||
;; during traversal.)
|
||
(setf symbol-packages
|
||
(sort symbol-packages #'fuzzy-matching-greaterp))
|
||
(loop
|
||
for package-matching across symbol-packages
|
||
for package = (maybe-find-local-package
|
||
(fuzzy-matching.package-name
|
||
package-matching))
|
||
while (or (not time-limit) (> rest-time-limit 0)) do
|
||
(multiple-value-bind (matchings remaining-time)
|
||
;; The duplication filter removes all those symbols
|
||
;; which are present in more than one package
|
||
;; match. See *FUZZY-DUPLICATE-SYMBOL-FILTER*
|
||
(find-symbols parsed-symbol-name package rest-time-limit
|
||
(%make-duplicate-symbols-filter
|
||
package-matching symbol-packages dedup-table))
|
||
(setf matchings (fix-up matchings package-matching))
|
||
(setf symbols (concatenate 'vector symbols matchings))
|
||
(setf rest-time-limit remaining-time)
|
||
(let ((guessed-sort-duration
|
||
(%guess-sort-duration (length symbols))))
|
||
(when (and rest-time-limit
|
||
(<= rest-time-limit guessed-sort-duration))
|
||
(decf rest-time-limit guessed-sort-duration)
|
||
(loop-finish))))
|
||
finally
|
||
(setf time-limit rest-time-limit)
|
||
(when (equal parsed-symbol-name "") ; E.g. STRING = "asd:"
|
||
(setf packages symbol-packages))))))
|
||
;; Sort by score; thing with equal score, sort alphabetically.
|
||
;; (Especially useful when PARSED-SYMBOL-NAME is empty, and all
|
||
;; possible completions are to be returned.)
|
||
(setf results (concatenate 'vector symbols packages))
|
||
(setf results (sort results #'fuzzy-matching-greaterp))
|
||
(values results (and time-limit (<= time-limit 0)))))))
|
||
|
||
(defun %guess-sort-duration (length)
|
||
;; These numbers are pretty much arbitrary, except that they're
|
||
;; vaguely correct on my machine with SBCL. Yes, this is an ugly
|
||
;; kludge, but it's better than before (where this didn't exist at
|
||
;; all, which essentially meant, that this was taken to be 0.)
|
||
(if (zerop length)
|
||
0
|
||
(let ((comparasions (* 3.8 (* length (log length 2)))))
|
||
(* 1000 (* comparasions (expt 10 -7)))))) ; msecs
|
||
|
||
(defun %make-duplicate-symbols-filter (current-package-matching fuzzy-package-matchings dedup-table)
|
||
;; Returns a filter function based on *FUZZY-DUPLICATE-SYMBOL-FILTER*.
|
||
(case *fuzzy-duplicate-symbol-filter*
|
||
(:home-package
|
||
;; Return a filter function that takes a symbol, and which returns T
|
||
;; if and only if /no/ matching in FUZZY-PACKAGE-MATCHINGS represents
|
||
;; the home-package of the symbol passed.
|
||
(let ((packages (mapcar #'(lambda (m)
|
||
(find-package (fuzzy-matching.package-name m)))
|
||
(remove current-package-matching
|
||
(coerce fuzzy-package-matchings 'list)))))
|
||
#'(lambda (symbol)
|
||
(not (member (symbol-package symbol) packages)))))
|
||
(:nearest-package
|
||
;; Keep only the first occurence of the symbol.
|
||
#'(lambda (symbol)
|
||
(unless (gethash (symbol-name symbol) dedup-table)
|
||
(setf (gethash (symbol-name symbol) dedup-table) t))))
|
||
(:all
|
||
;; No filter
|
||
#'identity)
|
||
(t
|
||
(typecase *fuzzy-duplicate-symbol-filter*
|
||
(function
|
||
;; Custom filter
|
||
(funcall *fuzzy-duplicate-symbol-filter*
|
||
(fuzzy-matching.package-name current-package-matching)
|
||
(map 'list #'fuzzy-matching.package-name fuzzy-package-matchings)
|
||
dedup-table))
|
||
(t
|
||
;; Bad filter value
|
||
(warn "bad *FUZZY-DUPLICATE-SYMBOL-FILTER* value: ~s"
|
||
*fuzzy-duplicate-symbol-filter*)
|
||
#'identity)))))
|
||
|
||
(defun fuzzy-matching-greaterp (m1 m2)
|
||
"Returns T if fuzzy-matching M1 should be sorted before M2.
|
||
Basically just the scores of the two matchings are compared, and
|
||
the match with higher score wins. For the case that the score is
|
||
equal, the one which comes alphabetically first wins."
|
||
(declare (type fuzzy-matching m1 m2))
|
||
(let ((score1 (fuzzy-matching.score m1))
|
||
(score2 (fuzzy-matching.score m2)))
|
||
(cond ((> score1 score2) t)
|
||
((< score1 score2) nil) ; total order
|
||
(t
|
||
(let ((name1 (symbol-name (fuzzy-matching.symbol m1)))
|
||
(name2 (symbol-name (fuzzy-matching.symbol m2))))
|
||
(string< name1 name2))))))
|
||
|
||
(declaim (ftype (function () (integer 0)) get-real-time-msecs))
|
||
(defun get-real-time-in-msecs ()
|
||
(let ((units-per-msec (max 1 (floor internal-time-units-per-second 1000))))
|
||
(values (floor (get-internal-real-time) units-per-msec))))
|
||
|
||
(defun fuzzy-find-matching-symbols
|
||
(string package &key (filter #'identity) external-only time-limit-in-msec)
|
||
"Returns two values: a vector of fuzzy matchings for matching
|
||
symbols in PACKAGE, using the fuzzy completion algorithm, and the
|
||
remaining time limit.
|
||
|
||
Only those symbols are considered of which FILTER does return T.
|
||
|
||
If EXTERNAL-ONLY is true, only external symbols are considered. A
|
||
TIME-LIMIT-IN-MSEC of NIL is considered no limit; if it's zero or
|
||
negative, perform a NOP."
|
||
(let ((time-limit-p (and time-limit-in-msec t))
|
||
(time-limit (or time-limit-in-msec 0))
|
||
(rtime-at-start (get-real-time-in-msecs))
|
||
(package-name (package-name package))
|
||
(count 0))
|
||
(declare (type boolean time-limit-p))
|
||
(declare (type integer time-limit rtime-at-start))
|
||
(declare (type (integer 0 #.(1- most-positive-fixnum)) count))
|
||
|
||
(flet ((recompute-remaining-time (old-remaining-time)
|
||
(cond ((not time-limit-p)
|
||
;; propagate NIL back as infinite time limit
|
||
(values nil nil))
|
||
((> count 0) ; ease up on getting internal time like crazy
|
||
(setf count (mod (1+ count) 128))
|
||
(values nil old-remaining-time))
|
||
(t (let* ((elapsed-time (- (get-real-time-in-msecs)
|
||
rtime-at-start))
|
||
(remaining (- time-limit elapsed-time)))
|
||
(values (<= remaining 0) remaining)))))
|
||
(perform-fuzzy-match (string symbol-name)
|
||
(let* ((converter (completion-output-symbol-converter string))
|
||
(converted-symbol-name (funcall converter symbol-name)))
|
||
(compute-highest-scoring-completion string
|
||
converted-symbol-name))))
|
||
(let ((completions (make-array 256 :adjustable t :fill-pointer 0))
|
||
(rest-time-limit time-limit))
|
||
(do-symbols* (symbol package)
|
||
(multiple-value-bind (exhausted? remaining-time)
|
||
(recompute-remaining-time rest-time-limit)
|
||
(setf rest-time-limit remaining-time)
|
||
(cond (exhausted? (return))
|
||
((not (and (or (not external-only)
|
||
(symbol-external-p symbol package))
|
||
(funcall filter symbol))))
|
||
((string= "" string) ; "" matches always
|
||
(vector-push-extend
|
||
(make-fuzzy-matching symbol package-name
|
||
0.0 '() '())
|
||
completions))
|
||
(t
|
||
(multiple-value-bind (match-result score)
|
||
(perform-fuzzy-match string (symbol-name symbol))
|
||
(when match-result
|
||
(vector-push-extend
|
||
(make-fuzzy-matching symbol package-name score
|
||
'() match-result)
|
||
completions)))))))
|
||
(values completions rest-time-limit)))))
|
||
|
||
(defun fuzzy-find-matching-packages (name &key time-limit-in-msec)
|
||
"Returns a vector of fuzzy matchings for each package that is
|
||
similiar to NAME, and the remaining time limit.
|
||
Cf. FUZZY-FIND-MATCHING-SYMBOLS."
|
||
(let ((time-limit-p (and time-limit-in-msec t))
|
||
(time-limit (or time-limit-in-msec 0))
|
||
(rtime-at-start (get-real-time-in-msecs))
|
||
(converter (completion-output-package-converter name))
|
||
(completions (make-array 32 :adjustable t :fill-pointer 0)))
|
||
(declare (type boolean time-limit-p))
|
||
(declare (type integer time-limit rtime-at-start))
|
||
(declare (type function converter))
|
||
(flet ((match-package (names)
|
||
(loop with max-pkg-name = ""
|
||
with max-result = nil
|
||
with max-score = 0
|
||
for package-name in names
|
||
for converted-name = (funcall converter package-name)
|
||
do
|
||
(multiple-value-bind (result score)
|
||
(compute-highest-scoring-completion name
|
||
converted-name)
|
||
(when (and result (> score max-score))
|
||
(setf max-pkg-name package-name)
|
||
(setf max-result result)
|
||
(setf max-score score)))
|
||
finally
|
||
(when max-result
|
||
(vector-push-extend
|
||
(make-fuzzy-matching nil max-pkg-name
|
||
max-score max-result '()
|
||
:symbol-p nil)
|
||
completions)))))
|
||
(cond ((and time-limit-p (<= time-limit 0))
|
||
(values #() time-limit))
|
||
(t
|
||
(loop for (nick) in (package-local-nicknames *buffer-package*)
|
||
do
|
||
(match-package (list nick)))
|
||
(loop for package in (list-all-packages)
|
||
do
|
||
;; Find best-matching package-nickname:
|
||
(match-package (package-names package))
|
||
finally
|
||
(return
|
||
(values completions
|
||
(and time-limit-p
|
||
(let ((elapsed-time (- (get-real-time-in-msecs)
|
||
rtime-at-start)))
|
||
(- time-limit elapsed-time)))))))))))
|
||
|
||
|
||
(defslimefun fuzzy-completion-selected (original-string completion)
|
||
"This function is called by Slime when a fuzzy completion is
|
||
selected by the user. It is for future expansion to make
|
||
testing, say, a machine learning algorithm for completion scoring
|
||
easier.
|
||
|
||
ORIGINAL-STRING is the string the user completed from, and
|
||
COMPLETION is the completion object (see docstring for
|
||
SWANK:FUZZY-COMPLETIONS) corresponding to the completion that the
|
||
user selected."
|
||
(declare (ignore original-string completion))
|
||
nil)
|
||
|
||
|
||
;;;;; Fuzzy completion core
|
||
|
||
(defparameter *fuzzy-recursion-soft-limit* 30
|
||
"This is a soft limit for recursion in
|
||
RECURSIVELY-COMPUTE-MOST-COMPLETIONS. Without this limit,
|
||
completing a string such as \"ZZZZZZ\" with a symbol named
|
||
\"ZZZZZZZZZZZZZZZZZZZZZZZ\" will result in explosive recursion to
|
||
find all the ways it can match.
|
||
|
||
Most natural language searches and symbols do not have this
|
||
problem -- this is only here as a safeguard.")
|
||
(declaim (fixnum *fuzzy-recursion-soft-limit*))
|
||
|
||
(defvar *all-chunks* '())
|
||
(declaim (type list *all-chunks*))
|
||
|
||
(defun compute-highest-scoring-completion (short full)
|
||
"Finds the highest scoring way to complete the abbreviation
|
||
SHORT onto the string FULL, using CHAR= as a equality function for
|
||
letters. Returns two values: The first being the completion
|
||
chunks of the highest scorer, and the second being the score."
|
||
(let* ((scored-results
|
||
(mapcar #'(lambda (result)
|
||
(cons (score-completion result short full) result))
|
||
(compute-most-completions short full)))
|
||
(winner (first (sort scored-results #'> :key #'first))))
|
||
(values (rest winner) (first winner))))
|
||
|
||
(defun compute-most-completions (short full)
|
||
"Finds most possible ways to complete FULL with the letters in SHORT.
|
||
Calls RECURSIVELY-COMPUTE-MOST-COMPLETIONS recursively. Returns
|
||
a list of (&rest CHUNKS), where each CHUNKS is a description of
|
||
how a completion matches."
|
||
(let ((*all-chunks* nil))
|
||
(recursively-compute-most-completions short full 0 0 nil nil nil t)
|
||
*all-chunks*))
|
||
|
||
(defun recursively-compute-most-completions
|
||
(short full
|
||
short-index initial-full-index
|
||
chunks current-chunk current-chunk-pos
|
||
recurse-p)
|
||
"Recursively (if RECURSE-P is true) find /most/ possible ways
|
||
to fuzzily map the letters in SHORT onto FULL, using CHAR= to
|
||
determine if two letters match.
|
||
|
||
A chunk is a list of elements that have matched consecutively.
|
||
When consecutive matches stop, it is coerced into a string,
|
||
paired with the starting position of the chunk, and pushed onto
|
||
CHUNKS.
|
||
|
||
Whenever a letter matches, if RECURSE-P is true,
|
||
RECURSIVELY-COMPUTE-MOST-COMPLETIONS calls itself with a position
|
||
one index ahead, to find other possibly higher scoring
|
||
possibilities. If there are less than
|
||
*FUZZY-RECURSION-SOFT-LIMIT* results in *ALL-CHUNKS* currently,
|
||
this call will also recurse.
|
||
|
||
Once a word has been completely matched, the chunks are pushed
|
||
onto the special variable *ALL-CHUNKS* and the function returns."
|
||
(declare (optimize speed)
|
||
(type fixnum short-index initial-full-index)
|
||
(type list current-chunk)
|
||
(simple-string short full))
|
||
(flet ((short-cur ()
|
||
"Returns the next letter from the abbreviation, or NIL
|
||
if all have been used."
|
||
(if (= short-index (length short))
|
||
nil
|
||
(aref short short-index)))
|
||
(add-to-chunk (char pos)
|
||
"Adds the CHAR at POS in FULL to the current chunk,
|
||
marking the start position if it is empty."
|
||
(unless current-chunk
|
||
(setf current-chunk-pos pos))
|
||
(push char current-chunk))
|
||
(collect-chunk ()
|
||
"Collects the current chunk to CHUNKS and prepares for
|
||
a new chunk."
|
||
(when current-chunk
|
||
(let ((current-chunk-as-string
|
||
(nreverse
|
||
(make-array (length current-chunk)
|
||
:element-type 'character
|
||
:initial-contents current-chunk))))
|
||
(push (list current-chunk-pos current-chunk-as-string) chunks)
|
||
(setf current-chunk nil
|
||
current-chunk-pos nil)))))
|
||
;; If there's an outstanding chunk coming in collect it. Since
|
||
;; we're recursively called on skipping an input character, the
|
||
;; chunk can't possibly continue on.
|
||
(when current-chunk (collect-chunk))
|
||
(do ((pos initial-full-index (1+ pos)))
|
||
((= pos (length full)))
|
||
(let ((cur-char (aref full pos)))
|
||
(if (and (short-cur)
|
||
(char= cur-char (short-cur)))
|
||
(progn
|
||
(when recurse-p
|
||
;; Try other possibilities, limiting insanely deep
|
||
;; recursion somewhat.
|
||
(recursively-compute-most-completions
|
||
short full short-index (1+ pos)
|
||
chunks current-chunk current-chunk-pos
|
||
(not (> (length *all-chunks*)
|
||
*fuzzy-recursion-soft-limit*))))
|
||
(incf short-index)
|
||
(add-to-chunk cur-char pos))
|
||
(collect-chunk))))
|
||
(collect-chunk)
|
||
;; If we've exhausted the short characters we have a match.
|
||
(if (short-cur)
|
||
nil
|
||
(let ((rev-chunks (reverse chunks)))
|
||
(push rev-chunks *all-chunks*)
|
||
rev-chunks))))
|
||
|
||
|
||
;;;;; Fuzzy completion scoring
|
||
|
||
(defvar *fuzzy-completion-symbol-prefixes* "*+-%&?<"
|
||
"Letters that are likely to be at the beginning of a symbol.
|
||
Letters found after one of these prefixes will be scored as if
|
||
they were at the beginning of ths symbol.")
|
||
(defvar *fuzzy-completion-symbol-suffixes* "*+->"
|
||
"Letters that are likely to be at the end of a symbol.
|
||
Letters found before one of these suffixes will be scored as if
|
||
they were at the end of the symbol.")
|
||
(defvar *fuzzy-completion-word-separators* "-/."
|
||
"Letters that separate different words in symbols. Letters
|
||
after one of these symbols will be scores more highly than other
|
||
letters.")
|
||
|
||
(defun score-completion (completion short full)
|
||
"Scores the completion chunks COMPLETION as a completion from
|
||
the abbreviation SHORT to the full string FULL. COMPLETION is a
|
||
list like:
|
||
((0 \"mul\") (9 \"v\") (15 \"b\"))
|
||
Which, if SHORT were \"mulvb\" and full were \"multiple-value-bind\",
|
||
would indicate that it completed as such (completed letters
|
||
capitalized):
|
||
MULtiple-Value-Bind
|
||
|
||
Letters are given scores based on their position in the string.
|
||
Letters at the beginning of a string or after a prefix letter at
|
||
the beginning of a string are scored highest. Letters after a
|
||
word separator such as #\- are scored next highest. Letters at
|
||
the end of a string or before a suffix letter at the end of a
|
||
string are scored medium, and letters anywhere else are scored
|
||
low.
|
||
|
||
If a letter is directly after another matched letter, and its
|
||
intrinsic value in that position is less than a percentage of the
|
||
previous letter's value, it will use that percentage instead.
|
||
|
||
Finally, a small scaling factor is applied to favor shorter
|
||
matches, all other things being equal."
|
||
(labels ((at-beginning-p (pos)
|
||
(= pos 0))
|
||
(after-prefix-p (pos)
|
||
(and (= pos 1)
|
||
(find (aref full 0) *fuzzy-completion-symbol-prefixes*)))
|
||
(word-separator-p (pos)
|
||
(find (aref full pos) *fuzzy-completion-word-separators*))
|
||
(after-word-separator-p (pos)
|
||
(find (aref full (1- pos)) *fuzzy-completion-word-separators*))
|
||
(at-end-p (pos)
|
||
(= pos (1- (length full))))
|
||
(before-suffix-p (pos)
|
||
(and (= pos (- (length full) 2))
|
||
(find (aref full (1- (length full)))
|
||
*fuzzy-completion-symbol-suffixes*)))
|
||
(score-or-percentage-of-previous (base-score pos chunk-pos)
|
||
(if (zerop chunk-pos)
|
||
base-score
|
||
(max base-score
|
||
(+ (* (score-char (1- pos) (1- chunk-pos)) 0.85)
|
||
(expt 1.2 chunk-pos)))))
|
||
(score-char (pos chunk-pos)
|
||
(score-or-percentage-of-previous
|
||
(cond ((at-beginning-p pos) 10)
|
||
((after-prefix-p pos) 10)
|
||
((word-separator-p pos) 1)
|
||
((after-word-separator-p pos) 8)
|
||
((at-end-p pos) 6)
|
||
((before-suffix-p pos) 6)
|
||
(t 1))
|
||
pos chunk-pos))
|
||
(score-chunk (chunk)
|
||
(loop for chunk-pos below (length (second chunk))
|
||
for pos from (first chunk)
|
||
summing (score-char pos chunk-pos))))
|
||
(let* ((chunk-scores (mapcar #'score-chunk completion))
|
||
(length-score (/ 10.0 (1+ (- (length full) (length short))))))
|
||
(values
|
||
(+ (reduce #'+ chunk-scores) length-score)
|
||
(list (mapcar #'list chunk-scores completion) length-score)))))
|
||
|
||
(defun highlight-completion (completion full)
|
||
"Given a chunk definition COMPLETION and the string FULL,
|
||
HIGHLIGHT-COMPLETION will create a string that demonstrates where
|
||
the completion matched in the string. Matches will be
|
||
capitalized, while the rest of the string will be lower-case."
|
||
(let ((highlit (nstring-downcase (copy-seq full))))
|
||
(dolist (chunk completion)
|
||
(setf highlit (nstring-upcase highlit
|
||
:start (first chunk)
|
||
:end (+ (first chunk)
|
||
(length (second chunk))))))
|
||
highlit))
|
||
|
||
(defun format-fuzzy-completion-set (winners)
|
||
"Given a list of completion objects such as on returned by
|
||
FUZZY-COMPLETION-SET, format the list into user-readable output
|
||
for interactive debugging purpose."
|
||
(let ((max-len
|
||
(loop for winner in winners maximizing (length (first winner)))))
|
||
(loop for (sym score result) in winners do
|
||
(format t "~&~VA score ~8,2F ~A"
|
||
max-len (highlight-completion result sym) score result))))
|
||
|
||
(provide :swank-fuzzy)
|