OLD | NEW |
(Empty) | |
| 1 from __future__ import absolute_import |
| 2 |
| 3 import os |
| 4 import re |
| 5 import sys |
| 6 from distutils import log |
| 7 import xml.dom.pulldom |
| 8 import shlex |
| 9 import locale |
| 10 import codecs |
| 11 import unicodedata |
| 12 import warnings |
| 13 from setuptools.compat import unicode, PY2 |
| 14 from setuptools.py31compat import TemporaryDirectory |
| 15 from xml.sax.saxutils import unescape |
| 16 |
| 17 try: |
| 18 import urlparse |
| 19 except ImportError: |
| 20 import urllib.parse as urlparse |
| 21 |
| 22 from subprocess import Popen as _Popen, PIPE as _PIPE |
| 23 |
| 24 #NOTE: Use of the command line options require SVN 1.3 or newer (December 2005) |
| 25 # and SVN 1.3 hasn't been supported by the developers since mid 2008. |
| 26 |
| 27 #subprocess is called several times with shell=(sys.platform=='win32') |
| 28 #see the follow for more information: |
| 29 # http://bugs.python.org/issue8557 |
| 30 # http://stackoverflow.com/questions/5658622/ |
| 31 # python-subprocess-popen-environment-path |
| 32 |
| 33 def _run_command(args, stdout=_PIPE, stderr=_PIPE, encoding=None, stream=0): |
| 34 #regarding the shell argument, see: http://bugs.python.org/issue8557 |
| 35 try: |
| 36 proc = _Popen(args, stdout=stdout, stderr=stderr, |
| 37 shell=(sys.platform == 'win32')) |
| 38 |
| 39 data = proc.communicate()[stream] |
| 40 except OSError: |
| 41 return 1, '' |
| 42 |
| 43 #doubled checked and |
| 44 data = decode_as_string(data, encoding) |
| 45 |
| 46 #communciate calls wait() |
| 47 return proc.returncode, data |
| 48 |
| 49 |
| 50 def _get_entry_schedule(entry): |
| 51 schedule = entry.getElementsByTagName('schedule')[0] |
| 52 return "".join([t.nodeValue |
| 53 for t in schedule.childNodes |
| 54 if t.nodeType == t.TEXT_NODE]) |
| 55 |
| 56 |
| 57 def _get_target_property(target): |
| 58 property_text = target.getElementsByTagName('property')[0] |
| 59 return "".join([t.nodeValue |
| 60 for t in property_text.childNodes |
| 61 if t.nodeType == t.TEXT_NODE]) |
| 62 |
| 63 |
| 64 def _get_xml_data(decoded_str): |
| 65 if PY2: |
| 66 #old versions want an encoded string |
| 67 data = decoded_str.encode('utf-8') |
| 68 else: |
| 69 data = decoded_str |
| 70 return data |
| 71 |
| 72 |
| 73 def joinpath(prefix, *suffix): |
| 74 if not prefix or prefix == '.': |
| 75 return os.path.join(*suffix) |
| 76 return os.path.join(prefix, *suffix) |
| 77 |
| 78 def determine_console_encoding(): |
| 79 try: |
| 80 #try for the preferred encoding |
| 81 encoding = locale.getpreferredencoding() |
| 82 |
| 83 #see if the locale.getdefaultlocale returns null |
| 84 #some versions of python\platforms return US-ASCII |
| 85 #when it cannot determine an encoding |
| 86 if not encoding or encoding == "US-ASCII": |
| 87 encoding = locale.getdefaultlocale()[1] |
| 88 |
| 89 if encoding: |
| 90 codecs.lookup(encoding) # make sure a lookup error is not made |
| 91 |
| 92 except (locale.Error, LookupError): |
| 93 encoding = None |
| 94 |
| 95 is_osx = sys.platform == "darwin" |
| 96 if not encoding: |
| 97 return ["US-ASCII", "utf-8"][is_osx] |
| 98 elif encoding.startswith("mac-") and is_osx: |
| 99 #certain versions of python would return mac-roman as default |
| 100 #OSX as a left over of earlier mac versions. |
| 101 return "utf-8" |
| 102 else: |
| 103 return encoding |
| 104 |
| 105 _console_encoding = determine_console_encoding() |
| 106 |
| 107 def decode_as_string(text, encoding=None): |
| 108 """ |
| 109 Decode the console or file output explicitly using getpreferredencoding. |
| 110 The text paraemeter should be a encoded string, if not no decode occurs |
| 111 If no encoding is given, getpreferredencoding is used. If encoding is |
| 112 specified, that is used instead. This would be needed for SVN --xml |
| 113 output. Unicode is explicitly put in composed NFC form. |
| 114 |
| 115 --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion |
| 116 DEV List from 2007 seems to indicate the same. |
| 117 """ |
| 118 #text should be a byte string |
| 119 |
| 120 if encoding is None: |
| 121 encoding = _console_encoding |
| 122 |
| 123 if not isinstance(text, unicode): |
| 124 text = text.decode(encoding) |
| 125 |
| 126 text = unicodedata.normalize('NFC', text) |
| 127 |
| 128 return text |
| 129 |
| 130 |
| 131 def parse_dir_entries(decoded_str): |
| 132 '''Parse the entries from a recursive info xml''' |
| 133 doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str)) |
| 134 entries = list() |
| 135 |
| 136 for event, node in doc: |
| 137 if event == 'START_ELEMENT' and node.nodeName == 'entry': |
| 138 doc.expandNode(node) |
| 139 if not _get_entry_schedule(node).startswith('delete'): |
| 140 entries.append((node.getAttribute('path'), |
| 141 node.getAttribute('kind'))) |
| 142 |
| 143 return entries[1:] # do not want the root directory |
| 144 |
| 145 |
| 146 def parse_externals_xml(decoded_str, prefix=''): |
| 147 '''Parse a propget svn:externals xml''' |
| 148 prefix = os.path.normpath(prefix) |
| 149 prefix = os.path.normcase(prefix) |
| 150 |
| 151 doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str)) |
| 152 externals = list() |
| 153 |
| 154 for event, node in doc: |
| 155 if event == 'START_ELEMENT' and node.nodeName == 'target': |
| 156 doc.expandNode(node) |
| 157 path = os.path.normpath(node.getAttribute('path')) |
| 158 |
| 159 if os.path.normcase(path).startswith(prefix): |
| 160 path = path[len(prefix)+1:] |
| 161 |
| 162 data = _get_target_property(node) |
| 163 #data should be decoded already |
| 164 for external in parse_external_prop(data): |
| 165 externals.append(joinpath(path, external)) |
| 166 |
| 167 return externals # do not want the root directory |
| 168 |
| 169 |
| 170 def parse_external_prop(lines): |
| 171 """ |
| 172 Parse the value of a retrieved svn:externals entry. |
| 173 |
| 174 possible token setups (with quotng and backscaping in laters versions) |
| 175 URL[@#] EXT_FOLDERNAME |
| 176 [-r#] URL EXT_FOLDERNAME |
| 177 EXT_FOLDERNAME [-r#] URL |
| 178 """ |
| 179 externals = [] |
| 180 for line in lines.splitlines(): |
| 181 line = line.lstrip() # there might be a "\ " |
| 182 if not line: |
| 183 continue |
| 184 |
| 185 if PY2: |
| 186 #shlex handles NULLs just fine and shlex in 2.7 tries to encode |
| 187 #as ascii automatiically |
| 188 line = line.encode('utf-8') |
| 189 line = shlex.split(line) |
| 190 if PY2: |
| 191 line = [x.decode('utf-8') for x in line] |
| 192 |
| 193 #EXT_FOLDERNAME is either the first or last depending on where |
| 194 #the URL falls |
| 195 if urlparse.urlsplit(line[-1])[0]: |
| 196 external = line[0] |
| 197 else: |
| 198 external = line[-1] |
| 199 |
| 200 external = decode_as_string(external, encoding="utf-8") |
| 201 externals.append(os.path.normpath(external)) |
| 202 |
| 203 return externals |
| 204 |
| 205 |
| 206 def parse_prop_file(filename, key): |
| 207 found = False |
| 208 f = open(filename, 'rt') |
| 209 data = '' |
| 210 try: |
| 211 for line in iter(f.readline, ''): # can't use direct iter! |
| 212 parts = line.split() |
| 213 if len(parts) == 2: |
| 214 kind, length = parts |
| 215 data = f.read(int(length)) |
| 216 if kind == 'K' and data == key: |
| 217 found = True |
| 218 elif kind == 'V' and found: |
| 219 break |
| 220 finally: |
| 221 f.close() |
| 222 |
| 223 return data |
| 224 |
| 225 |
| 226 class SvnInfo(object): |
| 227 ''' |
| 228 Generic svn_info object. No has little knowledge of how to extract |
| 229 information. Use cls.load to instatiate according svn version. |
| 230 |
| 231 Paths are not filesystem encoded. |
| 232 ''' |
| 233 |
| 234 @staticmethod |
| 235 def get_svn_version(): |
| 236 # Temp config directory should be enough to check for repository |
| 237 # This is needed because .svn always creates .subversion and |
| 238 # some operating systems do not handle dot directory correctly. |
| 239 # Real queries in real svn repos with be concerned with it creation |
| 240 with TemporaryDirectory() as tempdir: |
| 241 code, data = _run_command(['svn', |
| 242 '--config-dir', tempdir, |
| 243 '--version', |
| 244 '--quiet']) |
| 245 |
| 246 if code == 0 and data: |
| 247 return data.strip() |
| 248 else: |
| 249 return '' |
| 250 |
| 251 #svnversion return values (previous implementations return max revision) |
| 252 # 4123:4168 mixed revision working copy |
| 253 # 4168M modified working copy |
| 254 # 4123S switched working copy |
| 255 # 4123:4168MS mixed revision, modified, switched working copy |
| 256 revision_re = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I) |
| 257 |
| 258 @classmethod |
| 259 def load(cls, dirname=''): |
| 260 normdir = os.path.normpath(dirname) |
| 261 |
| 262 # Temp config directory should be enough to check for repository |
| 263 # This is needed because .svn always creates .subversion and |
| 264 # some operating systems do not handle dot directory correctly. |
| 265 # Real queries in real svn repos with be concerned with it creation |
| 266 with TemporaryDirectory() as tempdir: |
| 267 code, data = _run_command(['svn', |
| 268 '--config-dir', tempdir, |
| 269 'info', normdir]) |
| 270 |
| 271 # Must check for some contents, as some use empty directories |
| 272 # in testcases, however only enteries is needed also the info |
| 273 # command above MUST have worked |
| 274 svn_dir = os.path.join(normdir, '.svn') |
| 275 is_svn_wd = (not code or |
| 276 os.path.isfile(os.path.join(svn_dir, 'entries'))) |
| 277 |
| 278 svn_version = tuple(cls.get_svn_version().split('.')) |
| 279 |
| 280 try: |
| 281 base_svn_version = tuple(int(x) for x in svn_version[:2]) |
| 282 except ValueError: |
| 283 base_svn_version = tuple() |
| 284 |
| 285 if not is_svn_wd: |
| 286 #return an instance of this NO-OP class |
| 287 return SvnInfo(dirname) |
| 288 |
| 289 if code or not base_svn_version or base_svn_version < (1, 3): |
| 290 warnings.warn(("No SVN 1.3+ command found: falling back " |
| 291 "on pre 1.7 .svn parsing"), DeprecationWarning) |
| 292 return SvnFileInfo(dirname) |
| 293 |
| 294 if base_svn_version < (1, 5): |
| 295 return Svn13Info(dirname) |
| 296 |
| 297 return Svn15Info(dirname) |
| 298 |
| 299 def __init__(self, path=''): |
| 300 self.path = path |
| 301 self._entries = None |
| 302 self._externals = None |
| 303 |
| 304 def get_revision(self): |
| 305 'Retrieve the directory revision information using svnversion' |
| 306 code, data = _run_command(['svnversion', '-c', self.path]) |
| 307 if code: |
| 308 log.warn("svnversion failed") |
| 309 return 0 |
| 310 |
| 311 parsed = self.revision_re.match(data) |
| 312 if parsed: |
| 313 return int(parsed.group(2)) |
| 314 else: |
| 315 return 0 |
| 316 |
| 317 @property |
| 318 def entries(self): |
| 319 if self._entries is None: |
| 320 self._entries = self.get_entries() |
| 321 return self._entries |
| 322 |
| 323 @property |
| 324 def externals(self): |
| 325 if self._externals is None: |
| 326 self._externals = self.get_externals() |
| 327 return self._externals |
| 328 |
| 329 def iter_externals(self): |
| 330 ''' |
| 331 Iterate over the svn:external references in the repository path. |
| 332 ''' |
| 333 for item in self.externals: |
| 334 yield item |
| 335 |
| 336 def iter_files(self): |
| 337 ''' |
| 338 Iterate over the non-deleted file entries in the repository path |
| 339 ''' |
| 340 for item, kind in self.entries: |
| 341 if kind.lower() == 'file': |
| 342 yield item |
| 343 |
| 344 def iter_dirs(self, include_root=True): |
| 345 ''' |
| 346 Iterate over the non-deleted file entries in the repository path |
| 347 ''' |
| 348 if include_root: |
| 349 yield self.path |
| 350 for item, kind in self.entries: |
| 351 if kind.lower() == 'dir': |
| 352 yield item |
| 353 |
| 354 def get_entries(self): |
| 355 return [] |
| 356 |
| 357 def get_externals(self): |
| 358 return [] |
| 359 |
| 360 |
| 361 class Svn13Info(SvnInfo): |
| 362 def get_entries(self): |
| 363 code, data = _run_command(['svn', 'info', '-R', '--xml', self.path], |
| 364 encoding="utf-8") |
| 365 |
| 366 if code: |
| 367 log.debug("svn info failed") |
| 368 return [] |
| 369 |
| 370 return parse_dir_entries(data) |
| 371 |
| 372 def get_externals(self): |
| 373 #Previous to 1.5 --xml was not supported for svn propget and the -R |
| 374 #output format breaks the shlex compatible semantics. |
| 375 cmd = ['svn', 'propget', 'svn:externals'] |
| 376 result = [] |
| 377 for folder in self.iter_dirs(): |
| 378 code, lines = _run_command(cmd + [folder], encoding="utf-8") |
| 379 if code != 0: |
| 380 log.warn("svn propget failed") |
| 381 return [] |
| 382 #lines should a str |
| 383 for external in parse_external_prop(lines): |
| 384 if folder: |
| 385 external = os.path.join(folder, external) |
| 386 result.append(os.path.normpath(external)) |
| 387 |
| 388 return result |
| 389 |
| 390 |
| 391 class Svn15Info(Svn13Info): |
| 392 def get_externals(self): |
| 393 cmd = ['svn', 'propget', 'svn:externals', self.path, '-R', '--xml'] |
| 394 code, lines = _run_command(cmd, encoding="utf-8") |
| 395 if code: |
| 396 log.debug("svn propget failed") |
| 397 return [] |
| 398 return parse_externals_xml(lines, prefix=os.path.abspath(self.path)) |
| 399 |
| 400 |
| 401 class SvnFileInfo(SvnInfo): |
| 402 |
| 403 def __init__(self, path=''): |
| 404 super(SvnFileInfo, self).__init__(path) |
| 405 self._directories = None |
| 406 self._revision = None |
| 407 |
| 408 def _walk_svn(self, base): |
| 409 entry_file = joinpath(base, '.svn', 'entries') |
| 410 if os.path.isfile(entry_file): |
| 411 entries = SVNEntriesFile.load(base) |
| 412 yield (base, False, entries.parse_revision()) |
| 413 for path in entries.get_undeleted_records(): |
| 414 path = decode_as_string(path) |
| 415 path = joinpath(base, path) |
| 416 if os.path.isfile(path): |
| 417 yield (path, True, None) |
| 418 elif os.path.isdir(path): |
| 419 for item in self._walk_svn(path): |
| 420 yield item |
| 421 |
| 422 def _build_entries(self): |
| 423 entries = list() |
| 424 |
| 425 rev = 0 |
| 426 for path, isfile, dir_rev in self._walk_svn(self.path): |
| 427 if isfile: |
| 428 entries.append((path, 'file')) |
| 429 else: |
| 430 entries.append((path, 'dir')) |
| 431 rev = max(rev, dir_rev) |
| 432 |
| 433 self._entries = entries |
| 434 self._revision = rev |
| 435 |
| 436 def get_entries(self): |
| 437 if self._entries is None: |
| 438 self._build_entries() |
| 439 return self._entries |
| 440 |
| 441 def get_revision(self): |
| 442 if self._revision is None: |
| 443 self._build_entries() |
| 444 return self._revision |
| 445 |
| 446 def get_externals(self): |
| 447 prop_files = [['.svn', 'dir-prop-base'], |
| 448 ['.svn', 'dir-props']] |
| 449 externals = [] |
| 450 |
| 451 for dirname in self.iter_dirs(): |
| 452 prop_file = None |
| 453 for rel_parts in prop_files: |
| 454 filename = joinpath(dirname, *rel_parts) |
| 455 if os.path.isfile(filename): |
| 456 prop_file = filename |
| 457 |
| 458 if prop_file is not None: |
| 459 ext_prop = parse_prop_file(prop_file, 'svn:externals') |
| 460 #ext_prop should be utf-8 coming from svn:externals |
| 461 ext_prop = decode_as_string(ext_prop, encoding="utf-8") |
| 462 externals.extend(parse_external_prop(ext_prop)) |
| 463 |
| 464 return externals |
| 465 |
| 466 |
| 467 def svn_finder(dirname=''): |
| 468 #combined externals due to common interface |
| 469 #combined externals and entries due to lack of dir_props in 1.7 |
| 470 info = SvnInfo.load(dirname) |
| 471 for path in info.iter_files(): |
| 472 yield path |
| 473 |
| 474 for path in info.iter_externals(): |
| 475 sub_info = SvnInfo.load(path) |
| 476 for sub_path in sub_info.iter_files(): |
| 477 yield sub_path |
| 478 |
| 479 |
| 480 class SVNEntriesFile(object): |
| 481 def __init__(self, data): |
| 482 self.data = data |
| 483 |
| 484 @classmethod |
| 485 def load(class_, base): |
| 486 filename = os.path.join(base, '.svn', 'entries') |
| 487 f = open(filename) |
| 488 try: |
| 489 result = SVNEntriesFile.read(f) |
| 490 finally: |
| 491 f.close() |
| 492 return result |
| 493 |
| 494 @classmethod |
| 495 def read(class_, fileobj): |
| 496 data = fileobj.read() |
| 497 is_xml = data.startswith('<?xml') |
| 498 class_ = [SVNEntriesFileText, SVNEntriesFileXML][is_xml] |
| 499 return class_(data) |
| 500 |
| 501 def parse_revision(self): |
| 502 all_revs = self.parse_revision_numbers() + [0] |
| 503 return max(all_revs) |
| 504 |
| 505 |
| 506 class SVNEntriesFileText(SVNEntriesFile): |
| 507 known_svn_versions = { |
| 508 '1.4.x': 8, |
| 509 '1.5.x': 9, |
| 510 '1.6.x': 10, |
| 511 } |
| 512 |
| 513 def __get_cached_sections(self): |
| 514 return self.sections |
| 515 |
| 516 def get_sections(self): |
| 517 SECTION_DIVIDER = '\f\n' |
| 518 sections = self.data.split(SECTION_DIVIDER) |
| 519 sections = [x for x in map(str.splitlines, sections)] |
| 520 try: |
| 521 # remove the SVN version number from the first line |
| 522 svn_version = int(sections[0].pop(0)) |
| 523 if not svn_version in self.known_svn_versions.values(): |
| 524 log.warn("Unknown subversion verson %d", svn_version) |
| 525 except ValueError: |
| 526 return |
| 527 self.sections = sections |
| 528 self.get_sections = self.__get_cached_sections |
| 529 return self.sections |
| 530 |
| 531 def is_valid(self): |
| 532 return bool(self.get_sections()) |
| 533 |
| 534 def get_url(self): |
| 535 return self.get_sections()[0][4] |
| 536 |
| 537 def parse_revision_numbers(self): |
| 538 revision_line_number = 9 |
| 539 rev_numbers = [ |
| 540 int(section[revision_line_number]) |
| 541 for section in self.get_sections() |
| 542 if (len(section) > revision_line_number |
| 543 and section[revision_line_number]) |
| 544 ] |
| 545 return rev_numbers |
| 546 |
| 547 def get_undeleted_records(self): |
| 548 undeleted = lambda s: s and s[0] and (len(s) < 6 or s[5] != 'delete') |
| 549 result = [ |
| 550 section[0] |
| 551 for section in self.get_sections() |
| 552 if undeleted(section) |
| 553 ] |
| 554 return result |
| 555 |
| 556 |
| 557 class SVNEntriesFileXML(SVNEntriesFile): |
| 558 def is_valid(self): |
| 559 return True |
| 560 |
| 561 def get_url(self): |
| 562 "Get repository URL" |
| 563 urlre = re.compile('url="([^"]+)"') |
| 564 return urlre.search(self.data).group(1) |
| 565 |
| 566 def parse_revision_numbers(self): |
| 567 revre = re.compile(r'committed-rev="(\d+)"') |
| 568 return [ |
| 569 int(m.group(1)) |
| 570 for m in revre.finditer(self.data) |
| 571 ] |
| 572 |
| 573 def get_undeleted_records(self): |
| 574 entries_pattern = \ |
| 575 re.compile(r'name="([^"]+)"(?![^>]+deleted="true")', re.I) |
| 576 results = [ |
| 577 unescape(match.group(1)) |
| 578 for match in entries_pattern.finditer(self.data) |
| 579 ] |
| 580 return results |
| 581 |
| 582 |
| 583 if __name__ == '__main__': |
| 584 for name in svn_finder(sys.argv[1]): |
| 585 print(name) |
OLD | NEW |