I'd like to figure out how Sublime's View.classify routine works internally. Here's a mcve to play with:
import re
import textwrap
import sublime
import sublime_plugin
from sublime import Region
CLASS_WORD_START = 1
CLASS_WORD_END = 2
CLASS_PUNCTUATION_START = 4
CLASS_PUNCTUATION_END = 8
CLASS_SUB_WORD_START = 16
CLASS_SUB_WORD_END = 32
CLASS_LINE_START = 64
CLASS_LINE_END = 128
CLASS_EMPTY_LINE = 256
CLASS_MIDDLE_WORD = 512
CLASS_WORD_START_WITH_PUNCTUATION = 1024
CLASS_WORD_END_WITH_PUNCTUATION = 2048
CLASS_OPENING_PARENTHESIS = 4096
CLASS_CLOSING_PARENTHESIS = 8192
class PythonVsSublimeCommand(sublime_plugin.TextCommand):
def class_flags(self, flags):
res = []
if flags & CLASS_WORD_START:
res.append("CLASS_WORD_START")
if flags & CLASS_WORD_END:
res.append("CLASS_WORD_END")
if flags & CLASS_PUNCTUATION_START:
res.append("CLASS_PUNCTUATION_START")
if flags & CLASS_PUNCTUATION_END:
res.append("CLASS_PUNCTUATION_END")
if flags & CLASS_SUB_WORD_START:
res.append("CLASS_SUB_WORD_START")
if flags & CLASS_SUB_WORD_END:
res.append("CLASS_SUB_WORD_END")
if flags & CLASS_LINE_START:
res.append("CLASS_LINE_START")
if flags & CLASS_LINE_END:
res.append("CLASS_LINE_END")
if flags & CLASS_EMPTY_LINE:
res.append("CLASS_EMPTY_LINE")
if flags & CLASS_MIDDLE_WORD:
res.append("CLASS_MIDDLE_WORD")
if flags & CLASS_WORD_START_WITH_PUNCTUATION:
res.append("CLASS_WORD_START_WITH_PUNCTUATION")
if flags & CLASS_WORD_END_WITH_PUNCTUATION:
res.append("CLASS_WORD_END_WITH_PUNCTUATION")
if flags & CLASS_OPENING_PARENTHESIS:
res.append("CLASS_OPENING_PARENTHESIS")
if flags & CLASS_CLOSING_PARENTHESIS:
res.append("CLASS_CLOSING_PARENTHESIS")
return " | ".join(reversed(res))
def classify(self, point):
# Classifies point, returning a bitwise OR of zero or more of defined flags
#
# Note: This should be taken from word_separator settings
view = self.view
ws = r"[-[\]!\"#$%&'()*+,./:;<=>?@\\^`{|}~]"
res = 0
a, b = "", ""
if point > 0:
a = view.substr(Region(point - 1, point))
if point < view.size():
b = view.substr(Region(point, point + 1))
# Out of range
if view.size() == 0 or point < 0 or point > view.size():
return 3520
# If before and after the point are separators return 0
p = re.compile(ws)
if a == b and p.match(a):
return 0
# SubWord start & end
p = re.compile("[A-Z]")
if p.match(b) and not p.match(a):
res |= CLASS_SUB_WORD_START
res |= CLASS_SUB_WORD_END
if a == "_" and b != "_":
res |= CLASS_SUB_WORD_START
if b == "_" and a != "_":
res |= CLASS_SUB_WORD_END
# Punc start & end
p = re.compile(ws)
# Why ws != ""? See https:#github.com/limetext/rubex/issues/2
if ((p.match(b) and ws != "") or b == "") and not (p.match(a) and ws != ""):
res |= CLASS_PUNCTUATION_START
if ((p.match(a) and ws != "") or a == "") and not (p.match(b) and ws != ""):
res |= CLASS_PUNCTUATION_END
# Word start & end
re1 = re.compile(r"\w")
re2 = re.compile(r"\s")
if re1.match(b) and ((p.match(a) and ws != "") or re2.match(a) or a == ""):
res |= CLASS_WORD_START
if re1.match(a) and ((p.match(b) and ws != "") or re2.match(b) or b == ""):
res |= CLASS_WORD_END
# Line start & end
if a == "\n" or a == "":
res |= CLASS_LINE_START
if b == "\n" or b == "":
res |= CLASS_LINE_END
if ws == "":
res |= CLASS_WORD_END
# Empty line
if (a == "\n" and b == "\n") or (a == "" and b == ""):
res |= CLASS_EMPTY_LINE
# Middle word
p = re.compile(r"\w")
if p.match(a) and p.match(b):
res |= CLASS_MIDDLE_WORD
# Word start & end with punc
p = re.compile(r"\s")
if (res & CLASS_PUNCTUATION_START != 0) and (p.match(a) or a == ""):
res |= CLASS_WORD_START_WITH_PUNCTUATION
if (res & CLASS_PUNCTUATION_END != 0) and (p.match(b) or b == ""):
res |= CLASS_WORD_END_WITH_PUNCTUATION
# Openning & closing parentheses
p = re.compile(r"[[({]")
if p.match(a) or p.match(b):
res |= CLASS_OPENING_PARENTHESIS
# print(res)
p = re.compile(r"[)\]}]")
if p.match(a) or p.match(b):
res |= CLASS_CLOSING_PARENTHESIS
# TODO: isn't this a bug? what's the relation between
# ',' and parentheses
if a == ",":
res |= CLASS_OPENING_PARENTHESIS
if b == ",":
res |= CLASS_CLOSING_PARENTHESIS
return res
def run(self, edit, block=False):
self.view.sel().clear()
for i in range(self.view.size()):
c1 = self.classify(i)
c2 = self.view.classify(i)
if c1 != c2:
print("Mismatch position {} - {} => {}/{} vs {}/{}".format(
i, self.view.substr(i),
c1, self.class_flags(c1),
c2, self.class_flags(c2),
))
self.view.sel().add(Region(i, i + 1))
As you can see, I've already attempted to create a clone of View.classify but unfortunately the behaviour is not quite the same than Sublime's. If you bind the above command and run it on any view you'll see which are the differences between both.
QUESTION: How to adjust the routine so it will match 1:1 Sublime's?
Ps. The routine I've posted above has been a transpilation from Limetext's one, you can find it here
from How does Sublime's View.classify works?
No comments:
Post a Comment