Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: recipe_engine/third_party/setuptools/package_index.py

Issue 1344583003: Recipe package system. (Closed) Base URL: git@github.com:luci/recipes-py.git@master
Patch Set: Recompiled proto Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 """PyPI and direct package downloading"""
2 import sys
3 import os
4 import re
5 import shutil
6 import socket
7 import base64
8 import hashlib
9 from functools import wraps
10
11 from pkg_resources import (
12 CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
13 require, Environment, find_distributions, safe_name, safe_version,
14 to_filename, Requirement, DEVELOP_DIST,
15 )
16 from setuptools import ssl_support
17 from distutils import log
18 from distutils.errors import DistutilsError
19 from setuptools.compat import (urllib2, httplib, StringIO, HTTPError,
20 urlparse, urlunparse, unquote, splituser,
21 url2pathname, name2codepoint,
22 unichr, urljoin, urlsplit, urlunsplit,
23 ConfigParser)
24 from setuptools.compat import filterfalse
25 from fnmatch import translate
26 from setuptools.py26compat import strip_fragment
27 from setuptools.py27compat import get_all_headers
28
29 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
30 HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
31 # this is here to fix emacs' cruddy broken syntax highlighting
32 PYPI_MD5 = re.compile(
33 '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
34 'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
35 )
36 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
37 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
38
39 __all__ = [
40 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
41 'interpret_distro_name',
42 ]
43
44 _SOCKET_TIMEOUT = 15
45
46 def parse_bdist_wininst(name):
47 """Return (base,pyversion) or (None,None) for possible .exe name"""
48
49 lower = name.lower()
50 base, py_ver, plat = None, None, None
51
52 if lower.endswith('.exe'):
53 if lower.endswith('.win32.exe'):
54 base = name[:-10]
55 plat = 'win32'
56 elif lower.startswith('.win32-py',-16):
57 py_ver = name[-7:-4]
58 base = name[:-16]
59 plat = 'win32'
60 elif lower.endswith('.win-amd64.exe'):
61 base = name[:-14]
62 plat = 'win-amd64'
63 elif lower.startswith('.win-amd64-py',-20):
64 py_ver = name[-7:-4]
65 base = name[:-20]
66 plat = 'win-amd64'
67 return base,py_ver,plat
68
69
70 def egg_info_for_url(url):
71 scheme, server, path, parameters, query, fragment = urlparse(url)
72 base = unquote(path.split('/')[-1])
73 if server=='sourceforge.net' and base=='download': # XXX Yuck
74 base = unquote(path.split('/')[-2])
75 if '#' in base: base, fragment = base.split('#',1)
76 return base,fragment
77
78 def distros_for_url(url, metadata=None):
79 """Yield egg or source distribution objects that might be found at a URL"""
80 base, fragment = egg_info_for_url(url)
81 for dist in distros_for_location(url, base, metadata): yield dist
82 if fragment:
83 match = EGG_FRAGMENT.match(fragment)
84 if match:
85 for dist in interpret_distro_name(
86 url, match.group(1), metadata, precedence = CHECKOUT_DIST
87 ):
88 yield dist
89
90 def distros_for_location(location, basename, metadata=None):
91 """Yield egg or source distribution objects based on basename"""
92 if basename.endswith('.egg.zip'):
93 basename = basename[:-4] # strip the .zip
94 if basename.endswith('.egg') and '-' in basename:
95 # only one, unambiguous interpretation
96 return [Distribution.from_location(location, basename, metadata)]
97 if basename.endswith('.exe'):
98 win_base, py_ver, platform = parse_bdist_wininst(basename)
99 if win_base is not None:
100 return interpret_distro_name(
101 location, win_base, metadata, py_ver, BINARY_DIST, platform
102 )
103 # Try source distro extensions (.zip, .tgz, etc.)
104 #
105 for ext in EXTENSIONS:
106 if basename.endswith(ext):
107 basename = basename[:-len(ext)]
108 return interpret_distro_name(location, basename, metadata)
109 return [] # no extension matched
110
111 def distros_for_filename(filename, metadata=None):
112 """Yield possible egg or source distribution objects based on a filename"""
113 return distros_for_location(
114 normalize_path(filename), os.path.basename(filename), metadata
115 )
116
117
118 def interpret_distro_name(
119 location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
120 platform=None
121 ):
122 """Generate alternative interpretations of a source distro name
123
124 Note: if `location` is a filesystem filename, you should call
125 ``pkg_resources.normalize_path()`` on it before passing it to this
126 routine!
127 """
128 # Generate alternative interpretations of a source distro name
129 # Because some packages are ambiguous as to name/versions split
130 # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
131 # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
132 # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
133 # the spurious interpretations should be ignored, because in the event
134 # there's also an "adns" package, the spurious "python-1.1.0" version will
135 # compare lower than any numeric version number, and is therefore unlikely
136 # to match a request for it. It's still a potential problem, though, and
137 # in the long run PyPI and the distutils should go for "safe" names and
138 # versions in distribution archive names (sdist and bdist).
139
140 parts = basename.split('-')
141 if not py_version:
142 for i,p in enumerate(parts[2:]):
143 if len(p)==5 and p.startswith('py2.'):
144 return # It's a bdist_dumb, not an sdist -- bail out
145
146 for p in range(1,len(parts)+1):
147 yield Distribution(
148 location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
149 py_version=py_version, precedence = precedence,
150 platform = platform
151 )
152
153 # From Python 2.7 docs
154 def unique_everseen(iterable, key=None):
155 "List unique elements, preserving order. Remember all elements ever seen."
156 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
157 # unique_everseen('ABBCcAD', str.lower) --> A B C D
158 seen = set()
159 seen_add = seen.add
160 if key is None:
161 for element in filterfalse(seen.__contains__, iterable):
162 seen_add(element)
163 yield element
164 else:
165 for element in iterable:
166 k = key(element)
167 if k not in seen:
168 seen_add(k)
169 yield element
170
171 def unique_values(func):
172 """
173 Wrap a function returning an iterable such that the resulting iterable
174 only ever yields unique items.
175 """
176 @wraps(func)
177 def wrapper(*args, **kwargs):
178 return unique_everseen(func(*args, **kwargs))
179 return wrapper
180
181 REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
182 # this line is here to fix emacs' cruddy broken syntax highlighting
183
184 @unique_values
185 def find_external_links(url, page):
186 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
187
188 for match in REL.finditer(page):
189 tag, rel = match.groups()
190 rels = set(map(str.strip, rel.lower().split(',')))
191 if 'homepage' in rels or 'download' in rels:
192 for match in HREF.finditer(tag):
193 yield urljoin(url, htmldecode(match.group(1)))
194
195 for tag in ("<th>Home Page", "<th>Download URL"):
196 pos = page.find(tag)
197 if pos!=-1:
198 match = HREF.search(page,pos)
199 if match:
200 yield urljoin(url, htmldecode(match.group(1)))
201
202 user_agent = "Python-urllib/%s setuptools/%s" % (
203 sys.version[:3], require('setuptools')[0].version
204 )
205
206 class ContentChecker(object):
207 """
208 A null content checker that defines the interface for checking content
209 """
210 def feed(self, block):
211 """
212 Feed a block of data to the hash.
213 """
214 return
215
216 def is_valid(self):
217 """
218 Check the hash. Return False if validation fails.
219 """
220 return True
221
222 def report(self, reporter, template):
223 """
224 Call reporter with information about the checker (hash name)
225 substituted into the template.
226 """
227 return
228
229 class HashChecker(ContentChecker):
230 pattern = re.compile(
231 r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
232 r'(?P<expected>[a-f0-9]+)'
233 )
234
235 def __init__(self, hash_name, expected):
236 self.hash_name = hash_name
237 self.hash = hashlib.new(hash_name)
238 self.expected = expected
239
240 @classmethod
241 def from_url(cls, url):
242 "Construct a (possibly null) ContentChecker from a URL"
243 fragment = urlparse(url)[-1]
244 if not fragment:
245 return ContentChecker()
246 match = cls.pattern.search(fragment)
247 if not match:
248 return ContentChecker()
249 return cls(**match.groupdict())
250
251 def feed(self, block):
252 self.hash.update(block)
253
254 def is_valid(self):
255 return self.hash.hexdigest() == self.expected
256
257 def report(self, reporter, template):
258 msg = template % self.hash_name
259 return reporter(msg)
260
261
262 class PackageIndex(Environment):
263 """A distribution index that scans web pages for download URLs"""
264
265 def __init__(
266 self, index_url="https://pypi.python.org/simple", hosts=('*',),
267 ca_bundle=None, verify_ssl=True, *args, **kw
268 ):
269 Environment.__init__(self,*args,**kw)
270 self.index_url = index_url + "/"[:not index_url.endswith('/')]
271 self.scanned_urls = {}
272 self.fetched_urls = {}
273 self.package_pages = {}
274 self.allows = re.compile('|'.join(map(translate,hosts))).match
275 self.to_scan = []
276 if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support .find_ca_bundle()):
277 self.opener = ssl_support.opener_for(ca_bundle)
278 else: self.opener = urllib2.urlopen
279
280 def process_url(self, url, retrieve=False):
281 """Evaluate a URL as a possible download, and maybe retrieve it"""
282 if url in self.scanned_urls and not retrieve:
283 return
284 self.scanned_urls[url] = True
285 if not URL_SCHEME(url):
286 self.process_filename(url)
287 return
288 else:
289 dists = list(distros_for_url(url))
290 if dists:
291 if not self.url_ok(url):
292 return
293 self.debug("Found link: %s", url)
294
295 if dists or not retrieve or url in self.fetched_urls:
296 list(map(self.add, dists))
297 return # don't need the actual page
298
299 if not self.url_ok(url):
300 self.fetched_urls[url] = True
301 return
302
303 self.info("Reading %s", url)
304 self.fetched_urls[url] = True # prevent multiple fetch attempts
305 f = self.open_url(url, "Download error on %s: %%s -- Some packages may n ot be found!" % url)
306 if f is None: return
307 self.fetched_urls[f.url] = True
308 if 'html' not in f.headers.get('content-type', '').lower():
309 f.close() # not html, we can't process it
310 return
311
312 base = f.url # handle redirects
313 page = f.read()
314 if not isinstance(page, str): # We are in Python 3 and got bytes. We wan t str.
315 if isinstance(f, HTTPError):
316 # Errors have no charset, assume latin1:
317 charset = 'latin-1'
318 else:
319 charset = f.headers.get_param('charset') or 'latin-1'
320 page = page.decode(charset, "ignore")
321 f.close()
322 for match in HREF.finditer(page):
323 link = urljoin(base, htmldecode(match.group(1)))
324 self.process_url(link)
325 if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
326 page = self.process_index(url, page)
327
328 def process_filename(self, fn, nested=False):
329 # process filenames or directories
330 if not os.path.exists(fn):
331 self.warn("Not found: %s", fn)
332 return
333
334 if os.path.isdir(fn) and not nested:
335 path = os.path.realpath(fn)
336 for item in os.listdir(path):
337 self.process_filename(os.path.join(path,item), True)
338
339 dists = distros_for_filename(fn)
340 if dists:
341 self.debug("Found: %s", fn)
342 list(map(self.add, dists))
343
344 def url_ok(self, url, fatal=False):
345 s = URL_SCHEME(url)
346 if (s and s.group(1).lower()=='file') or self.allows(urlparse(url)[1]):
347 return True
348 msg = ("\nNote: Bypassing %s (disallowed host; see "
349 "http://bit.ly/1dg9ijs for details).\n")
350 if fatal:
351 raise DistutilsError(msg % url)
352 else:
353 self.warn(msg, url)
354
355 def scan_egg_links(self, search_path):
356 for item in search_path:
357 if os.path.isdir(item):
358 for entry in os.listdir(item):
359 if entry.endswith('.egg-link'):
360 self.scan_egg_link(item, entry)
361
362 def scan_egg_link(self, path, entry):
363 lines = [_f for _f in map(str.strip,
364 open(os.path.join(path, entry))) if _f]
365 if len(lines)==2:
366 for dist in find_distributions(os.path.join(path, lines[0])):
367 dist.location = os.path.join(path, *lines)
368 dist.precedence = SOURCE_DIST
369 self.add(dist)
370
371 def process_index(self,url,page):
372 """Process the contents of a PyPI page"""
373 def scan(link):
374 # Process a URL to see if it's for a package page
375 if link.startswith(self.index_url):
376 parts = list(map(
377 unquote, link[len(self.index_url):].split('/')
378 ))
379 if len(parts)==2 and '#' not in parts[1]:
380 # it's a package page, sanitize and index it
381 pkg = safe_name(parts[0])
382 ver = safe_version(parts[1])
383 self.package_pages.setdefault(pkg.lower(),{})[link] = True
384 return to_filename(pkg), to_filename(ver)
385 return None, None
386
387 # process an index page into the package-page index
388 for match in HREF.finditer(page):
389 try:
390 scan(urljoin(url, htmldecode(match.group(1))))
391 except ValueError:
392 pass
393
394 pkg, ver = scan(url) # ensure this page is in the page index
395 if pkg:
396 # process individual package page
397 for new_url in find_external_links(url, page):
398 # Process the found URL
399 base, frag = egg_info_for_url(new_url)
400 if base.endswith('.py') and not frag:
401 if ver:
402 new_url+='#egg=%s-%s' % (pkg,ver)
403 else:
404 self.need_version_info(url)
405 self.scan_url(new_url)
406
407 return PYPI_MD5.sub(
408 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page
409 )
410 else:
411 return "" # no sense double-scanning non-package pages
412
413 def need_version_info(self, url):
414 self.scan_all(
415 "Page at %s links to .py file(s) without version info; an index "
416 "scan is required.", url
417 )
418
419 def scan_all(self, msg=None, *args):
420 if self.index_url not in self.fetched_urls:
421 if msg: self.warn(msg,*args)
422 self.info(
423 "Scanning index of all packages (this may take a while)"
424 )
425 self.scan_url(self.index_url)
426
427 def find_packages(self, requirement):
428 self.scan_url(self.index_url + requirement.unsafe_name+'/')
429
430 if not self.package_pages.get(requirement.key):
431 # Fall back to safe version of the name
432 self.scan_url(self.index_url + requirement.project_name+'/')
433
434 if not self.package_pages.get(requirement.key):
435 # We couldn't find the target package, so search the index page too
436 self.not_found_in_index(requirement)
437
438 for url in list(self.package_pages.get(requirement.key,())):
439 # scan each page that might be related to the desired package
440 self.scan_url(url)
441
442 def obtain(self, requirement, installer=None):
443 self.prescan()
444 self.find_packages(requirement)
445 for dist in self[requirement.key]:
446 if dist in requirement:
447 return dist
448 self.debug("%s does not match %s", requirement, dist)
449 return super(PackageIndex, self).obtain(requirement,installer)
450
451 def check_hash(self, checker, filename, tfp):
452 """
453 checker is a ContentChecker
454 """
455 checker.report(self.debug,
456 "Validating %%s checksum for %s" % filename)
457 if not checker.is_valid():
458 tfp.close()
459 os.unlink(filename)
460 raise DistutilsError(
461 "%s validation failed for %s; "
462 "possible download problem?" % (
463 checker.hash.name, os.path.basename(filename))
464 )
465
466 def add_find_links(self, urls):
467 """Add `urls` to the list that will be prescanned for searches"""
468 for url in urls:
469 if (
470 self.to_scan is None # if we have already "gone online"
471 or not URL_SCHEME(url) # or it's a local file/directory
472 or url.startswith('file:')
473 or list(distros_for_url(url)) # or a direct package link
474 ):
475 # then go ahead and process it now
476 self.scan_url(url)
477 else:
478 # otherwise, defer retrieval till later
479 self.to_scan.append(url)
480
481 def prescan(self):
482 """Scan urls scheduled for prescanning (e.g. --find-links)"""
483 if self.to_scan:
484 list(map(self.scan_url, self.to_scan))
485 self.to_scan = None # from now on, go ahead and process immediately
486
487 def not_found_in_index(self, requirement):
488 if self[requirement.key]: # we've seen at least one distro
489 meth, msg = self.info, "Couldn't retrieve index page for %r"
490 else: # no distros seen for this name, might be misspelled
491 meth, msg = (self.warn,
492 "Couldn't find index page for %r (maybe misspelled?)")
493 meth(msg, requirement.unsafe_name)
494 self.scan_all()
495
496 def download(self, spec, tmpdir):
497 """Locate and/or download `spec` to `tmpdir`, returning a local path
498
499 `spec` may be a ``Requirement`` object, or a string containing a URL,
500 an existing local filename, or a project/version requirement spec
501 (i.e. the string form of a ``Requirement`` object). If it is the URL
502 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
503 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
504 automatically created alongside the downloaded file.
505
506 If `spec` is a ``Requirement`` object or a string containing a
507 project/version requirement spec, this method returns the location of
508 a matching distribution (possibly after downloading it to `tmpdir`).
509 If `spec` is a locally existing file or directory name, it is simply
510 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
511 of `tmpdir`, and the local filename is returned. Various errors may be
512 raised if a problem occurs during downloading.
513 """
514 if not isinstance(spec,Requirement):
515 scheme = URL_SCHEME(spec)
516 if scheme:
517 # It's a url, download it to tmpdir
518 found = self._download_url(scheme.group(1), spec, tmpdir)
519 base, fragment = egg_info_for_url(spec)
520 if base.endswith('.py'):
521 found = self.gen_setup(found,fragment,tmpdir)
522 return found
523 elif os.path.exists(spec):
524 # Existing file or directory, just return it
525 return spec
526 else:
527 try:
528 spec = Requirement.parse(spec)
529 except ValueError:
530 raise DistutilsError(
531 "Not a URL, existing file, or requirement spec: %r" %
532 (spec,)
533 )
534 return getattr(self.fetch_distribution(spec, tmpdir),'location',None)
535
536 def fetch_distribution(
537 self, requirement, tmpdir, force_scan=False, source=False,
538 develop_ok=False, local_index=None
539 ):
540 """Obtain a distribution suitable for fulfilling `requirement`
541
542 `requirement` must be a ``pkg_resources.Requirement`` instance.
543 If necessary, or if the `force_scan` flag is set, the requirement is
544 searched for in the (online) package index as well as the locally
545 installed packages. If a distribution matching `requirement` is found,
546 the returned distribution's ``location`` is the value you would have
547 gotten from calling the ``download()`` method with the matching
548 distribution's URL or filename. If no matching distribution is found,
549 ``None`` is returned.
550
551 If the `source` flag is set, only source distributions and source
552 checkout links will be considered. Unless the `develop_ok` flag is
553 set, development and system eggs (i.e., those using the ``.egg-info``
554 format) will be ignored.
555 """
556 # process a Requirement
557 self.info("Searching for %s", requirement)
558 skipped = {}
559 dist = None
560
561 def find(req, env=None):
562 if env is None:
563 env = self
564 # Find a matching distribution; may be called more than once
565
566 for dist in env[req.key]:
567
568 if dist.precedence==DEVELOP_DIST and not develop_ok:
569 if dist not in skipped:
570 self.warn("Skipping development or system egg: %s",dist)
571 skipped[dist] = 1
572 continue
573
574 if dist in req and (dist.precedence<=SOURCE_DIST or not source):
575 return dist
576
577 if force_scan:
578 self.prescan()
579 self.find_packages(requirement)
580 dist = find(requirement)
581
582 if local_index is not None:
583 dist = dist or find(requirement, local_index)
584
585 if dist is None:
586 if self.to_scan is not None:
587 self.prescan()
588 dist = find(requirement)
589
590 if dist is None and not force_scan:
591 self.find_packages(requirement)
592 dist = find(requirement)
593
594 if dist is None:
595 self.warn(
596 "No local packages or download links found for %s%s",
597 (source and "a source distribution of " or ""),
598 requirement,
599 )
600 else:
601 self.info("Best match: %s", dist)
602 return dist.clone(location=self.download(dist.location, tmpdir))
603
604 def fetch(self, requirement, tmpdir, force_scan=False, source=False):
605 """Obtain a file suitable for fulfilling `requirement`
606
607 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
608 backward compatibility, this routine is identical but returns the
609 ``location`` of the downloaded distribution instead of a distribution
610 object.
611 """
612 dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
613 if dist is not None:
614 return dist.location
615 return None
616
617 def gen_setup(self, filename, fragment, tmpdir):
618 match = EGG_FRAGMENT.match(fragment)
619 dists = match and [
620 d for d in
621 interpret_distro_name(filename, match.group(1), None) if d.version
622 ] or []
623
624 if len(dists)==1: # unambiguous ``#egg`` fragment
625 basename = os.path.basename(filename)
626
627 # Make sure the file has been downloaded to the temp dir.
628 if os.path.dirname(filename) != tmpdir:
629 dst = os.path.join(tmpdir, basename)
630 from setuptools.command.easy_install import samefile
631 if not samefile(filename, dst):
632 shutil.copy2(filename, dst)
633 filename=dst
634
635 with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
636 file.write(
637 "from setuptools import setup\n"
638 "setup(name=%r, version=%r, py_modules=[%r])\n"
639 % (
640 dists[0].project_name, dists[0].version,
641 os.path.splitext(basename)[0]
642 )
643 )
644 return filename
645
646 elif match:
647 raise DistutilsError(
648 "Can't unambiguously interpret project/version identifier %r; "
649 "any dashes in the name or version should be escaped using "
650 "underscores. %r" % (fragment,dists)
651 )
652 else:
653 raise DistutilsError(
654 "Can't process plain .py files without an '#egg=name-version'"
655 " suffix to enable automatic setup script generation."
656 )
657
658 dl_blocksize = 8192
659 def _download_to(self, url, filename):
660 self.info("Downloading %s", url)
661 # Download the file
662 fp, info = None, None
663 try:
664 checker = HashChecker.from_url(url)
665 fp = self.open_url(strip_fragment(url))
666 if isinstance(fp, HTTPError):
667 raise DistutilsError(
668 "Can't download %s: %s %s" % (url, fp.code,fp.msg)
669 )
670 headers = fp.info()
671 blocknum = 0
672 bs = self.dl_blocksize
673 size = -1
674 if "content-length" in headers:
675 # Some servers return multiple Content-Length headers :(
676 sizes = get_all_headers(headers, 'Content-Length')
677 size = max(map(int, sizes))
678 self.reporthook(url, filename, blocknum, bs, size)
679 with open(filename,'wb') as tfp:
680 while True:
681 block = fp.read(bs)
682 if block:
683 checker.feed(block)
684 tfp.write(block)
685 blocknum += 1
686 self.reporthook(url, filename, blocknum, bs, size)
687 else:
688 break
689 self.check_hash(checker, filename, tfp)
690 return headers
691 finally:
692 if fp: fp.close()
693
694 def reporthook(self, url, filename, blocknum, blksize, size):
695 pass # no-op
696
697 def open_url(self, url, warning=None):
698 if url.startswith('file:'):
699 return local_open(url)
700 try:
701 return open_with_auth(url, self.opener)
702 except (ValueError, httplib.InvalidURL):
703 v = sys.exc_info()[1]
704 msg = ' '.join([str(arg) for arg in v.args])
705 if warning:
706 self.warn(warning, msg)
707 else:
708 raise DistutilsError('%s %s' % (url, msg))
709 except urllib2.HTTPError:
710 v = sys.exc_info()[1]
711 return v
712 except urllib2.URLError:
713 v = sys.exc_info()[1]
714 if warning:
715 self.warn(warning, v.reason)
716 else:
717 raise DistutilsError("Download error for %s: %s"
718 % (url, v.reason))
719 except httplib.BadStatusLine:
720 v = sys.exc_info()[1]
721 if warning:
722 self.warn(warning, v.line)
723 else:
724 raise DistutilsError(
725 '%s returned a bad status line. The server might be '
726 'down, %s' %
727 (url, v.line)
728 )
729 except httplib.HTTPException:
730 v = sys.exc_info()[1]
731 if warning:
732 self.warn(warning, v)
733 else:
734 raise DistutilsError("Download error for %s: %s"
735 % (url, v))
736
737 def _download_url(self, scheme, url, tmpdir):
738 # Determine download filename
739 #
740 name, fragment = egg_info_for_url(url)
741 if name:
742 while '..' in name:
743 name = name.replace('..','.').replace('\\','_')
744 else:
745 name = "__downloaded__" # default if URL has no path contents
746
747 if name.endswith('.egg.zip'):
748 name = name[:-4] # strip the extra .zip before download
749
750 filename = os.path.join(tmpdir,name)
751
752 # Download the file
753 #
754 if scheme=='svn' or scheme.startswith('svn+'):
755 return self._download_svn(url, filename)
756 elif scheme=='git' or scheme.startswith('git+'):
757 return self._download_git(url, filename)
758 elif scheme.startswith('hg+'):
759 return self._download_hg(url, filename)
760 elif scheme=='file':
761 return url2pathname(urlparse(url)[2])
762 else:
763 self.url_ok(url, True) # raises error if not allowed
764 return self._attempt_download(url, filename)
765
766 def scan_url(self, url):
767 self.process_url(url, True)
768
769 def _attempt_download(self, url, filename):
770 headers = self._download_to(url, filename)
771 if 'html' in headers.get('content-type','').lower():
772 return self._download_html(url, headers, filename)
773 else:
774 return filename
775
776 def _download_html(self, url, headers, filename):
777 file = open(filename)
778 for line in file:
779 if line.strip():
780 # Check for a subversion index page
781 if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
782 # it's a subversion index page:
783 file.close()
784 os.unlink(filename)
785 return self._download_svn(url, filename)
786 break # not an index page
787 file.close()
788 os.unlink(filename)
789 raise DistutilsError("Unexpected HTML page found at "+url)
790
791 def _download_svn(self, url, filename):
792 url = url.split('#',1)[0] # remove any fragment for svn's sake
793 creds = ''
794 if url.lower().startswith('svn:') and '@' in url:
795 scheme, netloc, path, p, q, f = urlparse(url)
796 if not netloc and path.startswith('//') and '/' in path[2:]:
797 netloc, path = path[2:].split('/',1)
798 auth, host = splituser(netloc)
799 if auth:
800 if ':' in auth:
801 user, pw = auth.split(':',1)
802 creds = " --username=%s --password=%s" % (user, pw)
803 else:
804 creds = " --username="+auth
805 netloc = host
806 url = urlunparse((scheme, netloc, url, p, q, f))
807 self.info("Doing subversion checkout from %s to %s", url, filename)
808 os.system("svn checkout%s -q %s %s" % (creds, url, filename))
809 return filename
810
811 @staticmethod
812 def _vcs_split_rev_from_url(url, pop_prefix=False):
813 scheme, netloc, path, query, frag = urlsplit(url)
814
815 scheme = scheme.split('+', 1)[-1]
816
817 # Some fragment identification fails
818 path = path.split('#',1)[0]
819
820 rev = None
821 if '@' in path:
822 path, rev = path.rsplit('@', 1)
823
824 # Also, discard fragment
825 url = urlunsplit((scheme, netloc, path, query, ''))
826
827 return url, rev
828
829 def _download_git(self, url, filename):
830 filename = filename.split('#',1)[0]
831 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
832
833 self.info("Doing git clone from %s to %s", url, filename)
834 os.system("git clone --quiet %s %s" % (url, filename))
835
836 if rev is not None:
837 self.info("Checking out %s", rev)
838 os.system("(cd %s && git checkout --quiet %s)" % (
839 filename,
840 rev,
841 ))
842
843 return filename
844
845 def _download_hg(self, url, filename):
846 filename = filename.split('#',1)[0]
847 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
848
849 self.info("Doing hg clone from %s to %s", url, filename)
850 os.system("hg clone --quiet %s %s" % (url, filename))
851
852 if rev is not None:
853 self.info("Updating to %s", rev)
854 os.system("(cd %s && hg up -C -r %s >&-)" % (
855 filename,
856 rev,
857 ))
858
859 return filename
860
861 def debug(self, msg, *args):
862 log.debug(msg, *args)
863
864 def info(self, msg, *args):
865 log.info(msg, *args)
866
867 def warn(self, msg, *args):
868 log.warn(msg, *args)
869
870 # This pattern matches a character entity reference (a decimal numeric
871 # references, a hexadecimal numeric reference, or a named reference).
872 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
873
874 def uchr(c):
875 if not isinstance(c, int):
876 return c
877 if c>255: return unichr(c)
878 return chr(c)
879
880 def decode_entity(match):
881 what = match.group(1)
882 if what.startswith('#x'):
883 what = int(what[2:], 16)
884 elif what.startswith('#'):
885 what = int(what[1:])
886 else:
887 what = name2codepoint.get(what, match.group(0))
888 return uchr(what)
889
890 def htmldecode(text):
891 """Decode HTML entities in the given text."""
892 return entity_sub(decode_entity, text)
893
894 def socket_timeout(timeout=15):
895 def _socket_timeout(func):
896 def _socket_timeout(*args, **kwargs):
897 old_timeout = socket.getdefaulttimeout()
898 socket.setdefaulttimeout(timeout)
899 try:
900 return func(*args, **kwargs)
901 finally:
902 socket.setdefaulttimeout(old_timeout)
903 return _socket_timeout
904 return _socket_timeout
905
906 def _encode_auth(auth):
907 """
908 A function compatible with Python 2.3-3.3 that will encode
909 auth from a URL suitable for an HTTP header.
910 >>> str(_encode_auth('username%3Apassword'))
911 'dXNlcm5hbWU6cGFzc3dvcmQ='
912
913 Long auth strings should not cause a newline to be inserted.
914 >>> long_auth = 'username:' + 'password'*10
915 >>> chr(10) in str(_encode_auth(long_auth))
916 False
917 """
918 auth_s = unquote(auth)
919 # convert to bytes
920 auth_bytes = auth_s.encode()
921 # use the legacy interface for Python 2.3 support
922 encoded_bytes = base64.encodestring(auth_bytes)
923 # convert back to a string
924 encoded = encoded_bytes.decode()
925 # strip the trailing carriage return
926 return encoded.replace('\n','')
927
928 class Credential(object):
929 """
930 A username/password pair. Use like a namedtuple.
931 """
932 def __init__(self, username, password):
933 self.username = username
934 self.password = password
935
936 def __iter__(self):
937 yield self.username
938 yield self.password
939
940 def __str__(self):
941 return '%(username)s:%(password)s' % vars(self)
942
943 class PyPIConfig(ConfigParser.ConfigParser):
944
945 def __init__(self):
946 """
947 Load from ~/.pypirc
948 """
949 defaults = dict.fromkeys(['username', 'password', 'repository'], '')
950 ConfigParser.ConfigParser.__init__(self, defaults)
951
952 rc = os.path.join(os.path.expanduser('~'), '.pypirc')
953 if os.path.exists(rc):
954 self.read(rc)
955
956 @property
957 def creds_by_repository(self):
958 sections_with_repositories = [
959 section for section in self.sections()
960 if self.get(section, 'repository').strip()
961 ]
962
963 return dict(map(self._get_repo_cred, sections_with_repositories))
964
965 def _get_repo_cred(self, section):
966 repo = self.get(section, 'repository').strip()
967 return repo, Credential(
968 self.get(section, 'username').strip(),
969 self.get(section, 'password').strip(),
970 )
971
972 def find_credential(self, url):
973 """
974 If the URL indicated appears to be a repository defined in this
975 config, return the credential for that repository.
976 """
977 for repository, cred in self.creds_by_repository.items():
978 if url.startswith(repository):
979 return cred
980
981
982 def open_with_auth(url, opener=urllib2.urlopen):
983 """Open a urllib2 request, handling HTTP authentication"""
984
985 scheme, netloc, path, params, query, frag = urlparse(url)
986
987 # Double scheme does not raise on Mac OS X as revealed by a
988 # failing test. We would expect "nonnumeric port". Refs #20.
989 if netloc.endswith(':'):
990 raise httplib.InvalidURL("nonnumeric port: ''")
991
992 if scheme in ('http', 'https'):
993 auth, host = splituser(netloc)
994 else:
995 auth = None
996
997 if not auth:
998 cred = PyPIConfig().find_credential(url)
999 if cred:
1000 auth = str(cred)
1001 info = cred.username, url
1002 log.info('Authenticating as %s for %s (from .pypirc)' % info)
1003
1004 if auth:
1005 auth = "Basic " + _encode_auth(auth)
1006 new_url = urlunparse((scheme,host,path,params,query,frag))
1007 request = urllib2.Request(new_url)
1008 request.add_header("Authorization", auth)
1009 else:
1010 request = urllib2.Request(url)
1011
1012 request.add_header('User-Agent', user_agent)
1013 fp = opener(request)
1014
1015 if auth:
1016 # Put authentication info back into request URL if same host,
1017 # so that links found on the page will work
1018 s2, h2, path2, param2, query2, frag2 = urlparse(fp.url)
1019 if s2==scheme and h2==host:
1020 fp.url = urlunparse((s2,netloc,path2,param2,query2,frag2))
1021
1022 return fp
1023
1024 # adding a timeout to avoid freezing package_index
1025 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1026
1027
1028 def fix_sf_url(url):
1029 return url # backward compatibility
1030
1031 def local_open(url):
1032 """Read a local path, with special support for directories"""
1033 scheme, server, path, param, query, frag = urlparse(url)
1034 filename = url2pathname(path)
1035 if os.path.isfile(filename):
1036 return urllib2.urlopen(url)
1037 elif path.endswith('/') and os.path.isdir(filename):
1038 files = []
1039 for f in os.listdir(filename):
1040 if f=='index.html':
1041 with open(os.path.join(filename,f),'r') as fp:
1042 body = fp.read()
1043 break
1044 elif os.path.isdir(os.path.join(filename,f)):
1045 f+='/'
1046 files.append("<a href=%r>%s</a>" % (f,f))
1047 else:
1048 body = ("<html><head><title>%s</title>" % url) + \
1049 "</head><body>%s</body></html>" % '\n'.join(files)
1050 status, message = 200, "OK"
1051 else:
1052 status, message, body = 404, "Path not found", "Not found"
1053
1054 headers = {'content-type': 'text/html'}
1055 return HTTPError(url, status, message, headers, StringIO(body))
OLDNEW
« no previous file with comments | « recipe_engine/third_party/setuptools/msvc9_support.py ('k') | recipe_engine/third_party/setuptools/py26compat.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698