All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing. A text can be manually parsed with parse_text(text) which also updates the cache.

This commit is contained in:
Ian Thompson 2008-07-18 11:00:34 +00:00
parent 062643eefd
commit b205cf34b4
3 changed files with 405 additions and 185 deletions

@ -5,12 +5,37 @@ from tokenize import generate_tokens, TokenError
# TODO: Remove the dependency for a full Python installation. # TODO: Remove the dependency for a full Python installation.
class ClassDesc():
def __init__(self, name, defs, vars):
self.name = name
self.defs = defs
self.vars = vars
class ScriptDesc():
def __init__(self, name, imports, classes, defs, vars, incomplete=False):
self.name = name
self.imports = imports
self.classes = classes
self.defs = defs
self.vars = vars
self.incomplete = incomplete
self.time = 0
def set_time(self):
self.time = time()
# Context types # Context types
UNSET = -1
NORMAL = 0 NORMAL = 0
SINGLE_QUOTE = 1 SINGLE_QUOTE = 1
DOUBLE_QUOTE = 2 DOUBLE_QUOTE = 2
COMMENT = 3 COMMENT = 3
# Special period constants
AUTO = -1
# Python keywords # Python keywords
KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
'or', 'with', 'assert', 'else', 'if', 'pass', 'yield', 'or', 'with', 'assert', 'else', 'if', 'pass', 'yield',
@ -18,13 +43,318 @@ KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
'raise', 'continue', 'finally', 'is', 'return', 'def', 'for', 'raise', 'continue', 'finally', 'is', 'return', 'def', 'for',
'lambda', 'try' ] 'lambda', 'try' ]
# Used to cache the return value of generate_tokens
_token_cache = None
_cache_update = 0
ModuleType = type(__builtin__) ModuleType = type(__builtin__)
NoneScriptDesc = ScriptDesc('', dict(), dict(), dict(), dict(), True)
_modules = dict([(n, None) for n in sys.builtin_module_names]) _modules = dict([(n, None) for n in sys.builtin_module_names])
_modules_updated = 0 _modules_updated = 0
_parse_cache = dict()
def get_cached_descriptor(txt, period=AUTO):
"""Returns the cached ScriptDesc for the specified Text object 'txt'. If the
script has not been parsed in the last 'period' seconds it will be reparsed
to obtain this descriptor.
Specifying AUTO for the period (default) will choose a period based on the
size of the Text object. Larger texts are parsed less often.
"""
global _parse_cache, NoneScriptDesc, AUTO
if period == AUTO:
m = txt.nlines
r = 1
while True:
m = m >> 2
if not m: break
r = r << 1
period = r
key = hash(txt)
parse = True
if _parse_cache.has_key(key):
desc = _parse_cache[key]
if desc.time >= time() - period:
parse = desc.incomplete
if parse:
try:
desc = parse_text(txt)
except:
if _parse_cache.has_key(key):
del _parse_cache[key]
desc = NoneScriptDesc
return desc
def parse_text(txt):
"""Parses an entire script's text and returns a ScriptDesc instance
containing information about the script.
If the text is not a valid Python script a TokenError will be thrown.
Currently this means leaving brackets open will result in the script failing
to complete.
"""
global NORMAL, SINGLE_QUOTE, DOUBLE_QUOTE, COMMENT
txt.reset()
tokens = generate_tokens(txt.readline) # Throws TokenError
curl, cursor = txt.getCursorPos()
linen = curl + 1 # Token line numbers are one-based
imports = dict()
imp_step = 0
classes = dict()
cls_step = 0
defs = dict()
def_step = 0
vars = dict()
var_step = 0
var_accum = dict()
var_forflag = False
indent = 0
prev_type = -1
prev_string = ''
incomplete = False
try:
for type, string, start, end, line in tokens:
#################
## Indentation ##
#################
if type == tokenize.INDENT:
indent += 1
elif type == tokenize.DEDENT:
indent -= 1
#########################
## Module importing... ##
#########################
imp_store = False
# Default, look for 'from' or 'import' to start
if imp_step == 0:
if string == 'from':
imp_tmp = []
imp_step = 1
elif string == 'import':
imp_from = None
imp_tmp = []
imp_step = 2
# Found a 'from', create imp_from in form '???.???...'
elif imp_step == 1:
if string == 'import':
imp_from = '.'.join(imp_tmp)
imp_tmp = []
imp_step = 2
elif type == tokenize.NAME:
imp_tmp.append(string)
elif string != '.':
imp_step = 0 # Invalid syntax
# Found 'import', imp_from is populated or None, create imp_name
elif imp_step == 2:
if string == 'as':
imp_name = '.'.join(imp_tmp)
imp_step = 3
elif type == tokenize.NAME or string == '*':
imp_tmp.append(string)
elif string != '.':
imp_name = '.'.join(imp_tmp)
imp_symb = imp_name
imp_store = True
# Found 'as', change imp_symb to this value and go back to step 2
elif imp_step == 3:
if type == tokenize.NAME:
imp_symb = string
else:
imp_store = True
# Both imp_name and imp_symb have now been populated so we can import
if imp_store:
# Handle special case of 'import *'
if imp_name == '*':
parent = get_module(imp_from)
imports.update(parent.__dict__)
else:
# Try importing the name as a module
try:
if imp_from:
module = get_module(imp_from +'.'+ imp_name)
else:
module = get_module(imp_name)
imports[imp_symb] = module
except (ImportError, ValueError, AttributeError, TypeError):
# Try importing name as an attribute of the parent
try:
module = __import__(imp_from, globals(), locals(), [imp_name])
imports[imp_symb] = getattr(module, imp_name)
except (ImportError, ValueError, AttributeError, TypeError):
pass
# More to import from the same module?
if string == ',':
imp_tmp = []
imp_step = 2
else:
imp_step = 0
###################
## Class parsing ##
###################
# If we are inside a class then def and variable parsing should be done
# for the class. Otherwise the definitions are considered global
# Look for 'class'
if cls_step == 0:
if string == 'class':
cls_name = None
cls_indent = indent
cls_step = 1
# Found 'class', look for cls_name followed by '('
elif cls_step == 1:
if not cls_name:
if type == tokenize.NAME:
cls_name = string
cls_sline = False
cls_defs = dict()
cls_vars = dict()
elif string == ':':
cls_step = 2
# Found 'class' name ... ':', now check if it's a single line statement
elif cls_step == 2:
if type == tokenize.NEWLINE:
cls_sline = False
cls_step = 3
elif type != tokenize.COMMENT and type != tokenize.NL:
cls_sline = True
cls_step = 3
elif cls_step == 3:
if cls_sline:
if type == tokenize.NEWLINE:
classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
cls_step = 0
else:
if type == tokenize.DEDENT and indent <= cls_indent:
classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
cls_step = 0
#################
## Def parsing ##
#################
# Look for 'def'
if def_step == 0:
if string == 'def':
def_name = None
def_step = 1
# Found 'def', look for def_name followed by '('
elif def_step == 1:
if type == tokenize.NAME:
def_name = string
def_params = []
elif def_name and string == '(':
def_step = 2
# Found 'def' name '(', now identify the parameters upto ')'
# TODO: Handle ellipsis '...'
elif def_step == 2:
if type == tokenize.NAME:
def_params.append(string)
elif string == ')':
if cls_step > 0: # Parsing a class
cls_defs[def_name] = def_params
else:
defs[def_name] = def_params
def_step = 0
##########################
## Variable assignation ##
##########################
if cls_step > 0: # Parsing a class
# Look for 'self.???'
if var_step == 0:
if string == 'self':
var_step = 1
elif var_step == 1:
if string == '.':
var_name = None
var_step = 2
else:
var_step = 0
elif var_step == 2:
if type == tokenize.NAME:
var_name = string
var_step = 3
elif var_step == 3:
if string == '=':
cls_vars[var_name] = True
var_step = 0
elif def_step > 0: # Parsing a def
# Look for 'global ???[,???]'
if var_step == 0:
if string == 'global':
var_step = 1
elif var_step == 1:
if type == tokenize.NAME:
vars[string] = True
elif string != ',' and type != tokenize.NL:
var_step == 0
else: # In global scope
# Look for names
if string == 'for':
var_accum = dict()
var_forflag = True
elif string == '=' or (var_forflag and string == 'in'):
vars.update(var_accum)
var_accum = dict()
var_forflag = False
elif type == tokenize.NAME:
var_accum[string] = True
elif not string in [',', '(', ')', '[', ']']:
var_accum = dict()
var_forflag = False
#######################
## General utilities ##
#######################
prev_type = type
prev_string = string
# end:for
except TokenError:
incomplete = True
pass
desc = ScriptDesc(txt.name, imports, classes, defs, vars, incomplete)
desc.set_time()
global _parse_cache
_parse_cache[hash(txt.name)] = desc
return desc
def get_modules(since=1): def get_modules(since=1):
"""Returns the set of built-in modules and any modules that have been """Returns the set of built-in modules and any modules that have been
@ -45,20 +375,6 @@ def suggest_cmp(x, y):
return cmp(x[0].upper(), y[0].upper()) return cmp(x[0].upper(), y[0].upper())
def cached_generate_tokens(txt, since=1):
"""A caching version of generate tokens for multiple parsing of the same
document within a given timescale.
"""
global _token_cache, _cache_update
t = time()
if _cache_update < t - since:
txt.reset()
_token_cache = [g for g in generate_tokens(txt.readline)]
_cache_update = t
return _token_cache
def get_module(name): def get_module(name):
"""Returns the module specified by its name. The module itself is imported """Returns the module specified by its name. The module itself is imported
by this method and, as such, any initialization code will be executed. by this method and, as such, any initialization code will be executed.
@ -78,6 +394,7 @@ def type_char(v):
'm' if the parameter is a module 'm' if the parameter is a module
'f' if the parameter is callable 'f' if the parameter is callable
'v' if the parameter is variable or otherwise indeterminable 'v' if the parameter is variable or otherwise indeterminable
""" """
if isinstance(v, ModuleType): if isinstance(v, ModuleType):
@ -140,7 +457,8 @@ def get_context(txt):
def current_line(txt): def current_line(txt):
"""Extracts the Python script line at the cursor in the Blender Text object """Extracts the Python script line at the cursor in the Blender Text object
provided and cursor position within this line as the tuple pair (line, provided and cursor position within this line as the tuple pair (line,
cursor)""" cursor).
"""
(lineindex, cursor) = txt.getCursorPos() (lineindex, cursor) = txt.getCursorPos()
lines = txt.asLines() lines = txt.asLines()
@ -166,7 +484,8 @@ def current_line(txt):
def get_targets(line, cursor): def get_targets(line, cursor):
"""Parses a period separated string of valid names preceding the cursor and """Parses a period separated string of valid names preceding the cursor and
returns them as a list in the same order.""" returns them as a list in the same order.
"""
targets = [] targets = []
i = cursor - 1 i = cursor - 1
@ -176,6 +495,25 @@ def get_targets(line, cursor):
pre = line[i+1:cursor] pre = line[i+1:cursor]
return pre.split('.') return pre.split('.')
def get_defs(txt):
"""Returns a dictionary which maps definition names in the source code to
a list of their parameter names.
The line 'def doit(one, two, three): print one' for example, results in the
mapping 'doit' : [ 'one', 'two', 'three' ]
"""
return get_cached_descriptor(txt).defs
def get_vars(txt):
"""Returns a dictionary of variable names found in the specified Text
object. This method locates all names followed directly by an equal sign:
'a = ???' or indirectly as part of a tuple/list assignment or inside a
'for ??? in ???:' block.
"""
return get_cached_descriptor(txt).vars
def get_imports(txt): def get_imports(txt):
"""Returns a dictionary which maps symbol names in the source code to their """Returns a dictionary which maps symbol names in the source code to their
respective modules. respective modules.
@ -187,172 +525,51 @@ def get_imports(txt):
will execute any initilization code found within. will execute any initilization code found within.
""" """
# Unfortunately, generate_tokens may fail if the script leaves brackets or return get_cached_descriptor(txt).imports
# strings open or there are other syntax errors. For now we return an empty
# dictionary until an alternative parse method is implemented.
try:
tokens = cached_generate_tokens(txt)
except TokenError:
return dict()
imports = dict()
step = 0
for type, string, start, end, line in tokens:
store = False
# Default, look for 'from' or 'import' to start
if step == 0:
if string == 'from':
tmp = []
step = 1
elif string == 'import':
fromname = None
tmp = []
step = 2
# Found a 'from', create fromname in form '???.???...'
elif step == 1:
if string == 'import':
fromname = '.'.join(tmp)
tmp = []
step = 2
elif type == tokenize.NAME:
tmp.append(string)
elif string != '.':
step = 0 # Invalid syntax
# Found 'import', fromname is populated or None, create impname
elif step == 2:
if string == 'as':
impname = '.'.join(tmp)
step = 3
elif type == tokenize.NAME or string == '*':
tmp.append(string)
elif string != '.':
impname = '.'.join(tmp)
symbol = impname
store = True
# Found 'as', change symbol to this value and go back to step 2
elif step == 3:
if type == tokenize.NAME:
symbol = string
else:
store = True
# Both impname and symbol have now been populated so we can import
if store:
# Handle special case of 'import *'
if impname == '*':
parent = get_module(fromname)
imports.update(parent.__dict__)
else:
# Try importing the name as a module
try:
if fromname:
module = get_module(fromname +'.'+ impname)
else:
module = get_module(impname)
imports[symbol] = module
except (ImportError, ValueError, AttributeError, TypeError):
# Try importing name as an attribute of the parent
try:
module = __import__(fromname, globals(), locals(), [impname])
imports[symbol] = getattr(module, impname)
except (ImportError, ValueError, AttributeError, TypeError):
pass
# More to import from the same module?
if string == ',':
tmp = []
step = 2
else:
step = 0
return imports
def get_builtins(): def get_builtins():
"""Returns a dictionary of built-in modules, functions and variables.""" """Returns a dictionary of built-in modules, functions and variables."""
return __builtin__.__dict__ return __builtin__.__dict__
def get_defs(txt):
"""Returns a dictionary which maps definition names in the source code to
a list of their parameter names.
The line 'def doit(one, two, three): print one' for example, results in the #################################
mapping 'doit' : [ 'one', 'two', 'three' ] ## Debugging utility functions ##
#################################
def print_cache_for(txt, period=sys.maxint):
"""Prints out the data cached for a given Text object. If no period is
given the text will not be reparsed and the cached version will be returned.
Otherwise if the period has expired the text will be reparsed.
""" """
# See above for problems with generate_tokens desc = get_cached_descriptor(txt, period)
try: print '================================================'
tokens = cached_generate_tokens(txt) print 'Name:', desc.name, '('+str(hash(txt))+')'
except TokenError: print '------------------------------------------------'
return dict() print 'Defs:'
for name, params in desc.defs.items():
defs = dict() print ' ', name, params
step = 0 print '------------------------------------------------'
print 'Vars:'
for type, string, start, end, line in tokens: for name in desc.vars.keys():
print ' ', name
# Look for 'def' print '------------------------------------------------'
if step == 0: print 'Imports:'
if string == 'def': for name, item in desc.imports.items():
name = None print ' ', name.ljust(15), item
step = 1 print '------------------------------------------------'
print 'Classes:'
# Found 'def', look for name followed by '(' for clsnme, clsdsc in desc.classes.items():
elif step == 1: print ' *********************************'
if type == tokenize.NAME: print ' Name:', clsnme
name = string print ' ---------------------------------'
params = [] print ' Defs:'
elif name and string == '(': for name, params in clsdsc.defs.items():
step = 2 print ' ', name, params
print ' ---------------------------------'
# Found 'def' name '(', now identify the parameters upto ')' print ' Vars:'
# TODO: Handle ellipsis '...' for name in clsdsc.vars.keys():
elif step == 2: print ' ', name
if type == tokenize.NAME: print ' *********************************'
params.append(string) print '================================================'
elif string == ')':
defs[name] = params
step = 0
return defs
def get_vars(txt):
"""Returns a dictionary of variable names found in the specified Text
object. This method locates all names followed directly by an equal sign:
'a = ???' or indirectly as part of a tuple/list assignment or inside a
'for ??? in ???:' block.
"""
# See above for problems with generate_tokens
try:
tokens = cached_generate_tokens(txt)
except TokenError:
return []
vars = []
accum = [] # Used for tuple/list assignment
foring = False
for type, string, start, end, line in tokens:
# Look for names
if string == 'for':
foring = True
if string == '=' or (foring and string == 'in'):
vars.extend(accum)
accum = []
foring = False
elif type == tokenize.NAME:
accum.append(string)
elif not string in [',', '(', ')', '[', ']']:
accum = []
foring = False
return vars

@ -24,7 +24,7 @@ def main():
line, c = current_line(txt) line, c = current_line(txt)
# Check we are in a normal context # Check we are in a normal context
if get_context(txt) != 0: if get_context(txt) != NORMAL:
return return
pos = line.rfind('from ', 0, c) pos = line.rfind('from ', 0, c)

@ -4,8 +4,7 @@ Name: 'Member Suggest'
Blender: 246 Blender: 246
Group: 'TextPlugin' Group: 'TextPlugin'
Shortcut: 'Period' Shortcut: 'Period'
Tooltip: 'Lists members of the object preceding the cursor in the current text \ Tooltip: 'Lists members of the object preceding the cursor in the current text space'
space'
""" """
# Only run if we have the required modules # Only run if we have the required modules
@ -32,13 +31,14 @@ def main():
if len(pre) <= 1: if len(pre) <= 1:
return return
list = []
imports = get_imports(txt) imports = get_imports(txt)
builtins = get_builtins()
# Identify the root (root.sub.sub.) # Identify the root (root.sub.sub.)
if imports.has_key(pre[0]): if imports.has_key(pre[0]):
obj = imports[pre[0]] obj = imports[pre[0]]
elif builtins.has_key(pre[0]):
obj = builtins[pre[0]]
else: else:
return return
@ -52,9 +52,12 @@ def main():
try: try:
attr = obj.__dict__.keys() attr = obj.__dict__.keys()
if not attr:
attr = dir(obj)
except AttributeError: except AttributeError:
attr = dir(obj) attr = dir(obj)
list = []
for k in attr: for k in attr:
try: try:
v = getattr(obj, k) v = getattr(obj, k)