All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing. A text can be manually parsed with parse_text(text) which also updates the cache.

2008-07-18 11:00:34 +00:00 · 2008-07-18 11:00:34 +00:00 · b205cf34b4
commit b205cf34b4
parent 062643eefd
3 changed files with 405 additions and 185 deletions
--- a/release/scripts/bpymodules/BPyTextPlugin.py
+++ b/release/scripts/bpymodules/BPyTextPlugin.py
@ -5,12 +5,37 @@ from tokenize import generate_tokens, TokenError
 # TODO: Remove the dependency for a full Python installation.
 class ClassDesc():
 	def __init__(self, name, defs, vars):
 		self.name = name
 		self.defs = defs
 		self.vars = vars
 class ScriptDesc():
 	def __init__(self, name, imports, classes, defs, vars, incomplete=False):
 		self.name = name
 		self.imports = imports
 		self.classes = classes
 		self.defs = defs
 		self.vars = vars
 		self.incomplete = incomplete
 		self.time = 0
 	def set_time(self):
 		self.time = time()
 # Context types
 UNSET = -1
 NORMAL = 0
 SINGLE_QUOTE = 1
 DOUBLE_QUOTE = 2
 COMMENT = 3
 # Special period constants
 AUTO = -1
 # Python keywords
 KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
 			'or', 'with', 'assert', 'else', 'if', 'pass', 'yield',
@ -18,13 +43,318 @@ KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
 			'raise', 'continue', 'finally', 'is', 'return', 'def', 'for',
 			'lambda', 'try' ]
 # Used to cache the return value of generate_tokens
 _token_cache = None
 _cache_update = 0
 ModuleType = type(__builtin__)
 NoneScriptDesc = ScriptDesc('', dict(), dict(), dict(), dict(), True)
 _modules = dict([(n, None) for n in sys.builtin_module_names])
 _modules_updated = 0
 _parse_cache = dict()
 def get_cached_descriptor(txt, period=AUTO):
 	"""Returns the cached ScriptDesc for the specified Text object 'txt'. If the
 	script has not been parsed in the last 'period' seconds it will be reparsed
 	to obtain this descriptor.
 	Specifying AUTO for the period (default) will choose a period based on the
 	size of the Text object. Larger texts are parsed less often.
 	"""
 	global _parse_cache, NoneScriptDesc, AUTO
 	if period == AUTO:
 		m = txt.nlines
 		r = 1
 		while True:
 			m = m >> 2
 			if not m: break
 			r = r << 1
 		period = r
 	key = hash(txt)
 	parse = True
 	if _parse_cache.has_key(key):
 		desc = _parse_cache[key]
 		if desc.time >= time() - period:
 			parse = desc.incomplete
 	if parse:
 		try:
 			desc = parse_text(txt)
 		except:
 			if _parse_cache.has_key(key):
 				del _parse_cache[key]
 			desc = NoneScriptDesc
 	return desc
 def parse_text(txt):
 	"""Parses an entire script's text and returns a ScriptDesc instance
 	containing information about the script.
 	If the text is not a valid Python script a TokenError will be thrown.
 	Currently this means leaving brackets open will result in the script failing
 	to complete.
 	"""
 	global NORMAL, SINGLE_QUOTE, DOUBLE_QUOTE, COMMENT
 	txt.reset()
 	tokens = generate_tokens(txt.readline) # Throws TokenError
 	curl, cursor = txt.getCursorPos()
 	linen = curl + 1 # Token line numbers are one-based
 	imports = dict()
 	imp_step = 0
 	classes = dict()
 	cls_step = 0
 	defs = dict()
 	def_step = 0
 	vars = dict()
 	var_step = 0
 	var_accum = dict()
 	var_forflag = False
 	indent = 0
 	prev_type = -1
 	prev_string = ''
 	incomplete = False
 	try:
 	 for type, string, start, end, line in tokens:
 		#################
 		## Indentation ##
 		#################
 		if type == tokenize.INDENT:
 			indent += 1
 		elif type == tokenize.DEDENT:
 			indent -= 1
 		#########################
 		## Module importing... ##
 		#########################
 		imp_store = False
 		# Default, look for 'from' or 'import' to start
 		if imp_step == 0:
 			if string == 'from':
 				imp_tmp = []
 				imp_step = 1
 			elif string == 'import':
 				imp_from = None
 				imp_tmp = []
 				imp_step = 2
 		# Found a 'from', create imp_from in form '???.???...'
 		elif imp_step == 1:
 			if string == 'import':
 				imp_from = '.'.join(imp_tmp)
 				imp_tmp = []
 				imp_step = 2
 			elif type == tokenize.NAME:
 				imp_tmp.append(string)
 			elif string != '.':
 				imp_step = 0 # Invalid syntax
 		# Found 'import', imp_from is populated or None, create imp_name
 		elif imp_step == 2:
 			if string == 'as':
 				imp_name = '.'.join(imp_tmp)
 				imp_step = 3
 			elif type == tokenize.NAME or string == '*':
 				imp_tmp.append(string)
 			elif string != '.':
 				imp_name = '.'.join(imp_tmp)
 				imp_symb = imp_name
 				imp_store = True
 		# Found 'as', change imp_symb to this value and go back to step 2
 		elif imp_step == 3:
 			if type == tokenize.NAME:
 				imp_symb = string
 			else:
 				imp_store = True
 		# Both imp_name and imp_symb have now been populated so we can import
 		if imp_store:
 			# Handle special case of 'import *'
 			if imp_name == '*':
 				parent = get_module(imp_from)
 				imports.update(parent.__dict__)
 			else:
 				# Try importing the name as a module
 				try:
 					if imp_from:
 						module = get_module(imp_from +'.'+ imp_name)
 					else:
 						module = get_module(imp_name)
 					imports[imp_symb] = module
 				except (ImportError, ValueError, AttributeError, TypeError):
 					# Try importing name as an attribute of the parent
 					try:
 						module = __import__(imp_from, globals(), locals(), [imp_name])
 						imports[imp_symb] = getattr(module, imp_name)
 					except (ImportError, ValueError, AttributeError, TypeError):
 						pass
 			# More to import from the same module?
 			if string == ',':
 				imp_tmp = []
 				imp_step = 2
 			else:
 				imp_step = 0
 		###################
 		## Class parsing ##
 		###################
 		# If we are inside a class then def and variable parsing should be done
 		# for the class. Otherwise the definitions are considered global
 		# Look for 'class'
 		if cls_step == 0:
 			if string == 'class':
 				cls_name = None
 				cls_indent = indent
 				cls_step = 1
 		# Found 'class', look for cls_name followed by '('
 		elif cls_step == 1:
 			if not cls_name:
 				if type == tokenize.NAME:
 					cls_name = string
 					cls_sline = False
 					cls_defs = dict()
 					cls_vars = dict()
 			elif string == ':':
 				cls_step = 2
 		# Found 'class' name ... ':', now check if it's a single line statement
 		elif cls_step == 2:
 			if type == tokenize.NEWLINE:
 				cls_sline = False
 				cls_step = 3
 			elif type != tokenize.COMMENT and type != tokenize.NL:
 				cls_sline = True
 				cls_step = 3
 		elif cls_step == 3:
 			if cls_sline:
 				if type == tokenize.NEWLINE:
 					classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
 					cls_step = 0
 			else:
 				if type == tokenize.DEDENT and indent <= cls_indent:
 					classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
 					cls_step = 0
 		#################
 		## Def parsing ##
 		#################
 		# Look for 'def'
 		if def_step == 0:
 			if string == 'def':
 				def_name = None
 				def_step = 1
 		# Found 'def', look for def_name followed by '('
 		elif def_step == 1:
 			if type == tokenize.NAME:
 				def_name = string
 				def_params = []
 			elif def_name and string == '(':
 				def_step = 2
 		# Found 'def' name '(', now identify the parameters upto ')'
 		# TODO: Handle ellipsis '...'
 		elif def_step == 2:
 			if type == tokenize.NAME:
 				def_params.append(string)
 			elif string == ')':
 				if cls_step > 0: # Parsing a class
 					cls_defs[def_name] = def_params
 				else:
 					defs[def_name] = def_params
 				def_step = 0
 		##########################
 		## Variable assignation ##
 		##########################
 		if cls_step > 0: # Parsing a class
 			# Look for 'self.???'
 			if var_step == 0:
 				if string == 'self':
 					var_step = 1
 			elif var_step == 1:
 				if string == '.':
 					var_name = None
 					var_step = 2
 				else:
 					var_step = 0
 			elif var_step == 2:
 				if type == tokenize.NAME:
 					var_name = string
 					var_step = 3
 			elif var_step == 3:
 				if string == '=':
 					cls_vars[var_name] = True
 					var_step = 0
 		elif def_step > 0: # Parsing a def
 			# Look for 'global ???[,???]'
 			if var_step == 0:
 				if string == 'global':
 					var_step = 1
 			elif var_step == 1:
 				if type == tokenize.NAME:
 					vars[string] = True
 				elif string != ',' and type != tokenize.NL:
 					var_step == 0
 		else: # In global scope
 			# Look for names
 			if string == 'for':
 				var_accum = dict()
 				var_forflag = True
 			elif string == '=' or (var_forflag and string == 'in'):
 				vars.update(var_accum)
 				var_accum = dict()
 				var_forflag = False
 			elif type == tokenize.NAME:
 				var_accum[string] = True
 			elif not string in [',', '(', ')', '[', ']']:
 				var_accum = dict()
 				var_forflag = False
 		#######################
 		## General utilities ##
 		#######################
 		prev_type = type
 		prev_string = string
 	 # end:for
 	except TokenError:
 		incomplete = True
 		pass
 	desc = ScriptDesc(txt.name, imports, classes, defs, vars, incomplete)
 	desc.set_time()
 	global _parse_cache
 	_parse_cache[hash(txt.name)] = desc
 	return desc
 def get_modules(since=1):
 	"""Returns the set of built-in modules and any modules that have been
@ -45,20 +375,6 @@ def suggest_cmp(x, y):
 	return cmp(x[0].upper(), y[0].upper())
 def cached_generate_tokens(txt, since=1):
 	"""A caching version of generate tokens for multiple parsing of the same
 	document within a given timescale.
 	"""
 	global _token_cache, _cache_update
 	t = time()
 	if _cache_update < t - since:
 		txt.reset()
 		_token_cache = [g for g in generate_tokens(txt.readline)]
 		_cache_update = t
 	return _token_cache
 def get_module(name):
 	"""Returns the module specified by its name. The module itself is imported
 	by this method and, as such, any initialization code will be executed.
@ -78,6 +394,7 @@ def type_char(v):
 	  'm' if the parameter is a module
 	  'f' if the parameter is callable
 	  'v' if the parameter is variable or otherwise indeterminable
 	"""
 	if isinstance(v, ModuleType):
@ -140,7 +457,8 @@ def get_context(txt):
 def current_line(txt):
 	"""Extracts the Python script line at the cursor in the Blender Text object
 	provided and cursor position within this line as the tuple pair (line,
-	cursor)"""
+	cursor).
 	"""
 	(lineindex, cursor) = txt.getCursorPos()
 	lines = txt.asLines()
@ -166,7 +484,8 @@ def current_line(txt):
 def get_targets(line, cursor):
 	"""Parses a period separated string of valid names preceding the cursor and
-	returns them as a list in the same order."""
+	returns them as a list in the same order.
 	"""
 	targets = []
 	i = cursor - 1
@ -176,6 +495,25 @@ def get_targets(line, cursor):
 	pre = line[i+1:cursor]
 	return pre.split('.')
 def get_defs(txt):
 	"""Returns a dictionary which maps definition names in the source code to
 	a list of their parameter names.
 	The line 'def doit(one, two, three): print one' for example, results in the
 	mapping 'doit' : [ 'one', 'two', 'three' ]
 	"""
 	return get_cached_descriptor(txt).defs
 def get_vars(txt):
 	"""Returns a dictionary of variable names found in the specified Text
 	object. This method locates all names followed directly by an equal sign:
 	'a = ???' or indirectly as part of a tuple/list assignment or inside a
 	'for ??? in ???:' block.
 	"""
 	return get_cached_descriptor(txt).vars
 def get_imports(txt):
 	"""Returns a dictionary which maps symbol names in the source code to their
 	respective modules.
@ -187,172 +525,51 @@ def get_imports(txt):
 	will execute any initilization code found within.
 	"""
-	# Unfortunately, generate_tokens may fail if the script leaves brackets or
+	return get_cached_descriptor(txt).imports
 	# strings open or there are other syntax errors. For now we return an empty
 	# dictionary until an alternative parse method is implemented.
 	try:
 		tokens = cached_generate_tokens(txt)
 	except TokenError:
 		return dict()
 	imports = dict()
 	step = 0
 	for type, string, start, end, line in tokens:
 		store = False
 		# Default, look for 'from' or 'import' to start
 		if step == 0:
 			if string == 'from':
 				tmp = []
 				step = 1
 			elif string == 'import':
 				fromname = None
 				tmp = []
 				step = 2
 		# Found a 'from', create fromname in form '???.???...'
 		elif step == 1:
 			if string == 'import':
 				fromname = '.'.join(tmp)
 				tmp = []
 				step = 2
 			elif type == tokenize.NAME:
 				tmp.append(string)
 			elif string != '.':
 				step = 0 # Invalid syntax
 		# Found 'import', fromname is populated or None, create impname
 		elif step == 2:
 			if string == 'as':
 				impname = '.'.join(tmp)
 				step = 3
 			elif type == tokenize.NAME or string == '*':
 				tmp.append(string)
 			elif string != '.':
 				impname = '.'.join(tmp)
 				symbol = impname
 				store = True
 		# Found 'as', change symbol to this value and go back to step 2
 		elif step == 3:
 			if type == tokenize.NAME:
 				symbol = string
 			else:
 				store = True
 		# Both impname and symbol have now been populated so we can import
 		if store:
 			# Handle special case of 'import *'
 			if impname == '*':
 				parent = get_module(fromname)
 				imports.update(parent.__dict__)
 			else:
 				# Try importing the name as a module
 				try:
 					if fromname:
 						module = get_module(fromname +'.'+ impname)
 					else:
 						module = get_module(impname)
 					imports[symbol] = module
 				except (ImportError, ValueError, AttributeError, TypeError):
 					# Try importing name as an attribute of the parent
 					try:
 						module = __import__(fromname, globals(), locals(), [impname])
 						imports[symbol] = getattr(module, impname)
 					except (ImportError, ValueError, AttributeError, TypeError):
 						pass
 			# More to import from the same module?
 			if string == ',':
 				tmp = []
 				step = 2
 			else:
 				step = 0
 	return imports
 def get_builtins():
 	"""Returns a dictionary of built-in modules, functions and variables."""
 	return __builtin__.__dict__
 def get_defs(txt):
 	"""Returns a dictionary which maps definition names in the source code to
 	a list of their parameter names.
-	The line 'def doit(one, two, three): print one' for example, results in the
+#################################
-	mapping 'doit' : [ 'one', 'two', 'three' ]
+## Debugging utility functions ##
 #################################
 def print_cache_for(txt, period=sys.maxint):
 	"""Prints out the data cached for a given Text object. If no period is
 	given the text will not be reparsed and the cached version will be returned.
 	Otherwise if the period has expired the text will be reparsed.
 	"""
-	# See above for problems with generate_tokens
+	desc = get_cached_descriptor(txt, period)
-	try:
+	print '================================================'
-		tokens = cached_generate_tokens(txt)
+	print 'Name:', desc.name, '('+str(hash(txt))+')'
-	except TokenError:
+	print '------------------------------------------------'
-		return dict()
+	print 'Defs:'
-	
+	for name, params in desc.defs.items():
-	defs = dict()
+		print ' ', name, params
-	step = 0
+	print '------------------------------------------------'
-	
+	print 'Vars:'
-	for type, string, start, end, line in tokens:
+	for name in desc.vars.keys():
-		
+		print ' ', name
-		# Look for 'def'
+	print '------------------------------------------------'
-		if step == 0:
+	print 'Imports:'
-			if string == 'def':
+	for name, item in desc.imports.items():
-				name = None
+		print ' ', name.ljust(15), item
-				step = 1
+	print '------------------------------------------------'
-		
+	print 'Classes:'
-		# Found 'def', look for name followed by '('
+	for clsnme, clsdsc in desc.classes.items():
-		elif step == 1:
+		print '  *********************************'
-			if type == tokenize.NAME:
+		print '  Name:', clsnme
-				name = string
+		print '  ---------------------------------'
-				params = []
+		print '  Defs:'
-			elif name and string == '(':
+		for name, params in clsdsc.defs.items():
-				step = 2
+			print '   ', name, params
-		
+		print '  ---------------------------------'
-		# Found 'def' name '(', now identify the parameters upto ')'
+		print '  Vars:'
-		# TODO: Handle ellipsis '...'
+		for name in clsdsc.vars.keys():
-		elif step == 2:
+			print '   ', name
-			if type == tokenize.NAME:
+		print '  *********************************'
-				params.append(string)
+	print '================================================'
 			elif string == ')':
 				defs[name] = params
 				step = 0
 	return defs
 def get_vars(txt):
 	"""Returns a dictionary of variable names found in the specified Text
 	object. This method locates all names followed directly by an equal sign:
 	'a = ???' or indirectly as part of a tuple/list assignment or inside a
 	'for ??? in ???:' block.
 	"""
 	# See above for problems with generate_tokens
 	try:
 		tokens = cached_generate_tokens(txt)
 	except TokenError:
 		return []
 	vars = []
 	accum = [] # Used for tuple/list assignment
 	foring = False
 	for type, string, start, end, line in tokens:
 		# Look for names
 		if string == 'for':
 			foring = True
 		if string == '=' or (foring and string == 'in'):
 			vars.extend(accum)
 			accum = []
 			foring = False
 		elif type == tokenize.NAME:
 			accum.append(string)
 		elif not string in [',', '(', ')', '[', ']']:
 			accum = []
 			foring = False
 	return vars
--- a/release/scripts/textplugin_imports.py
+++ b/release/scripts/textplugin_imports.py
@ -24,7 +24,7 @@ def main():
 	line, c = current_line(txt)
 	# Check we are in a normal context
-	if get_context(txt) != 0:
+	if get_context(txt) != NORMAL:
 		return
 	pos = line.rfind('from ', 0, c)
--- a/release/scripts/textplugin_membersuggest.py
+++ b/release/scripts/textplugin_membersuggest.py
@ -4,8 +4,7 @@ Name: 'Member Suggest'
 Blender: 246
 Group: 'TextPlugin'
 Shortcut: 'Period'
-Tooltip: 'Lists members of the object preceding the cursor in the current text \
+Tooltip: 'Lists members of the object preceding the cursor in the current text space'
 space'
 """
 # Only run if we have the required modules
@ -32,13 +31,14 @@ def main():
 	if len(pre) <= 1:
 		return
 	list = []
 	imports = get_imports(txt)
 	builtins = get_builtins()
 	# Identify the root (root.sub.sub.)
 	if imports.has_key(pre[0]):
 		obj = imports[pre[0]]
 	elif builtins.has_key(pre[0]):
 		obj = builtins[pre[0]]
 	else:
 		return
@ -52,9 +52,12 @@ def main():
 	try:
 		attr = obj.__dict__.keys()
 		if not attr:
 			attr = dir(obj)
 	except AttributeError:
 		attr = dir(obj)
 	list = []
 	for k in attr:
 		try:
 			v = getattr(obj, k)