Hemant Vishwakarma: How does Sublime's View.classify works?

I'd like to figure out how Sublime's View.classify routine works internally. Here's a mcve to play with:

import re
import textwrap

import sublime
import sublime_plugin
from sublime import Region

CLASS_WORD_START = 1
CLASS_WORD_END = 2
CLASS_PUNCTUATION_START = 4
CLASS_PUNCTUATION_END = 8
CLASS_SUB_WORD_START = 16
CLASS_SUB_WORD_END = 32
CLASS_LINE_START = 64
CLASS_LINE_END = 128
CLASS_EMPTY_LINE = 256
CLASS_MIDDLE_WORD = 512
CLASS_WORD_START_WITH_PUNCTUATION = 1024
CLASS_WORD_END_WITH_PUNCTUATION = 2048
CLASS_OPENING_PARENTHESIS = 4096
CLASS_CLOSING_PARENTHESIS = 8192


class PythonVsSublimeCommand(sublime_plugin.TextCommand):

    def class_flags(self, flags):
        res = []
        if flags & CLASS_WORD_START:
            res.append("CLASS_WORD_START")
        if flags & CLASS_WORD_END:
            res.append("CLASS_WORD_END")
        if flags & CLASS_PUNCTUATION_START:
            res.append("CLASS_PUNCTUATION_START")
        if flags & CLASS_PUNCTUATION_END:
            res.append("CLASS_PUNCTUATION_END")
        if flags & CLASS_SUB_WORD_START:
            res.append("CLASS_SUB_WORD_START")
        if flags & CLASS_SUB_WORD_END:
            res.append("CLASS_SUB_WORD_END")
        if flags & CLASS_LINE_START:
            res.append("CLASS_LINE_START")
        if flags & CLASS_LINE_END:
            res.append("CLASS_LINE_END")
        if flags & CLASS_EMPTY_LINE:
            res.append("CLASS_EMPTY_LINE")
        if flags & CLASS_MIDDLE_WORD:
            res.append("CLASS_MIDDLE_WORD")
        if flags & CLASS_WORD_START_WITH_PUNCTUATION:
            res.append("CLASS_WORD_START_WITH_PUNCTUATION")
        if flags & CLASS_WORD_END_WITH_PUNCTUATION:
            res.append("CLASS_WORD_END_WITH_PUNCTUATION")
        if flags & CLASS_OPENING_PARENTHESIS:
            res.append("CLASS_OPENING_PARENTHESIS")
        if flags & CLASS_CLOSING_PARENTHESIS:
            res.append("CLASS_CLOSING_PARENTHESIS")
        return " | ".join(reversed(res))

    def classify(self, point):
        # Classifies point, returning a bitwise OR of zero or more of defined flags
        #
        # Note: This should be taken from word_separator settings
        view = self.view

        ws = r"[-[\]!\"#$%&'()*+,./:;<=>?@\\^`{|}~]"
        res = 0
        a, b = "", ""

        if point > 0:
            a = view.substr(Region(point - 1, point))

        if point < view.size():
            b = view.substr(Region(point, point + 1))

        # Out of range
        if view.size() == 0 or point < 0 or point > view.size():
            return 3520

        # If before and after the point are separators return 0
        p = re.compile(ws)
        if a == b and p.match(a):
            return 0

        # SubWord start & end
        p = re.compile("[A-Z]")
        if p.match(b) and not p.match(a):
            res |= CLASS_SUB_WORD_START
            res |= CLASS_SUB_WORD_END

        if a == "_" and b != "_":
            res |= CLASS_SUB_WORD_START

        if b == "_" and a != "_":
            res |= CLASS_SUB_WORD_END

        # Punc start & end
        p = re.compile(ws)

        # Why ws != ""? See https:#github.com/limetext/rubex/issues/2
        if ((p.match(b) and ws != "") or b == "") and not (p.match(a) and ws != ""):
            res |= CLASS_PUNCTUATION_START
        if ((p.match(a) and ws != "") or a == "") and not (p.match(b) and ws != ""):
            res |= CLASS_PUNCTUATION_END

        # Word start & end
        re1 = re.compile(r"\w")
        re2 = re.compile(r"\s")

        if re1.match(b) and ((p.match(a) and ws != "") or re2.match(a) or a == ""):
            res |= CLASS_WORD_START
        if re1.match(a) and ((p.match(b) and ws != "") or re2.match(b) or b == ""):
            res |= CLASS_WORD_END

        # Line start & end
        if a == "\n" or a == "":
            res |= CLASS_LINE_START
        if b == "\n" or b == "":
            res |= CLASS_LINE_END
            if ws == "":
                res |= CLASS_WORD_END

        # Empty line
        if (a == "\n" and b == "\n") or (a == "" and b == ""):
            res |= CLASS_EMPTY_LINE

        # Middle word
        p = re.compile(r"\w")
        if p.match(a) and p.match(b):
            res |= CLASS_MIDDLE_WORD

        # Word start & end with punc
        p = re.compile(r"\s")
        if (res & CLASS_PUNCTUATION_START != 0) and (p.match(a) or a == ""):
            res |= CLASS_WORD_START_WITH_PUNCTUATION
        if (res & CLASS_PUNCTUATION_END != 0) and (p.match(b) or b == ""):
            res |= CLASS_WORD_END_WITH_PUNCTUATION

        # Openning & closing parentheses
        p = re.compile(r"[[({]")
        if p.match(a) or p.match(b):
            res |= CLASS_OPENING_PARENTHESIS

        # print(res)

        p = re.compile(r"[)\]}]")
        if p.match(a) or p.match(b):
            res |= CLASS_CLOSING_PARENTHESIS

        # TODO: isn't this a bug? what's the relation between
        # ',' and parentheses
        if a == ",":
            res |= CLASS_OPENING_PARENTHESIS
        if b == ",":
            res |= CLASS_CLOSING_PARENTHESIS

        return res

    def run(self, edit, block=False):
        self.view.sel().clear()

        for i in range(self.view.size()):
            c1 = self.classify(i)
            c2 = self.view.classify(i)
            if c1 != c2:
                print("Mismatch position {} - {} => {}/{} vs {}/{}".format(
                    i, self.view.substr(i),
                    c1, self.class_flags(c1),
                    c2, self.class_flags(c2),
                ))
                self.view.sel().add(Region(i, i + 1))

As you can see, I've already attempted to create a clone of View.classify but unfortunately the behaviour is not quite the same than Sublime's. If you bind the above command and run it on any view you'll see which are the differences between both.

QUESTION: How to adjust the routine so it will match 1:1 Sublime's?

Ps. The routine I've posted above has been a transpilation from Limetext's one, you can find it here

from How does Sublime's View.classify works?

Hemant Vishwakarma

Thursday, 18 April 2019

How does Sublime's View.classify works?

No comments:

Post a Comment