Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add basic setup.py extraction #4

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion metadata_please/source_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
- PEP 621 metadata (pyproject.toml)
- Poetry metadata (pyproject.toml)
- Setuptools static metadata (setup.cfg)
- Setuptools, low effort reading (setup.py)

Notably, does not read setup.py or attempt to emulate anything that can't be read staticly.
Notably, does not read nontrivial setup.py or attempt to emulate anything that can't be read staticly.
"""
import ast
import re
from pathlib import Path

Expand All @@ -22,6 +24,8 @@

from packaging.utils import canonicalize_name

from .source_checkout_ast import SetupFindingVisitor, UNKNOWN

from .types import BasicMetadata

OPERATOR_RE = re.compile(r"([<>=~]+)(\d.*)")
Expand Down Expand Up @@ -54,6 +58,7 @@ def from_source_checkout(path: Path) -> bytes:
from_pep621_checkout(path)
or from_poetry_checkout(path)
or from_setup_cfg_checkout(path)
or from_setup_py_checkout(path)
)


Expand Down Expand Up @@ -227,6 +232,39 @@ def from_setup_cfg_checkout(path: Path) -> bytes:
return "".join(buf).encode("utf-8")


def from_setup_py_checkout(path: Path) -> bytes:
try:
data = (path / "setup.py").read_bytes()
except FileNotFoundError:
return b""

v = SetupFindingVisitor()
v.visit(ast.parse(data))

if not v.setup_call_args:
return b""

buf = []

r = v.setup_call_args.get("install_requires")
if r:
if r is UNKNOWN:
raise ValueError("Complex setup call can't extract reqs")
for dep in r:
buf.append(f"Requires-Dist: {dep}\n")
er = v.setup_call_args.get("extras_require")
if er:
if er is UNKNOWN:
raise ValueError("Complex setup call can't extract extras")
for k, deps in er.items():
extra_name = canonicalize_name(k)
buf.append(f"Provides-Extra: {extra_name}\n")
for i in deps:
buf.append("Requires-Dist: " + merge_extra_marker(extra_name, i) + "\n")

return "".join(buf).encode("utf-8")


def basic_metadata_from_source_checkout(path: Path) -> BasicMetadata:
return BasicMetadata.from_metadata(from_source_checkout(path))

Expand Down
123 changes: 123 additions & 0 deletions metadata_please/source_checkout_ast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""
Reads static values from setup.py when they are simple enough.

With the goal of just getting dependencies, and returning a clear error if we don't understand, this has a simpler

This only reads ~50% of current setup.py, vs dowsing which is more like 80%.

I experimented with a more complex version of this in
[dowsing](https://github.com/python-packaging/dowsing/) with a goal of 100%
coverage of open source
"""

import ast
from typing import Any, Dict, Optional


# Copied from orig-index
class ShortCircuitingVisitor(ast.NodeVisitor):
"""
This visitor behaves more like libcst.CSTVisitor in that a visit_ method
can return true or false to specify whether children get visited, and the
visiting of children is not the responsibility of the visit_ method.
"""

def visit(self, node: ast.AST) -> None:
method = "visit_" + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
rv = visitor(node)
if rv:
self.visit_children(node)

def visit_children(self, node: ast.AST) -> None:
for field, value in ast.iter_fields(node):
if isinstance(value, list):
for item in value:
if isinstance(item, ast.AST):
self.visit(item)
elif isinstance(value, ast.AST):
self.visit(value)

def generic_visit(self, node: ast.AST) -> bool:
return True


class QualifiedNameSaver(ShortCircuitingVisitor):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: currently doesn't use the short-circuiting feature for anything but skipping func(setup(...)) by accident.

"""Similar to LibCST's QualifiedNameProvider except simpler and wronger"""

def __init__(self) -> None:
super().__init__()
self.qualified_name_prefixes: Dict[str, str] = {}

def qualified_name(self, node: ast.AST) -> str:
if isinstance(node, ast.Attribute):
return self.qualified_name(node.value) + "." + node.attr
elif isinstance(node, ast.Expr):
return self.qualified_name(node.value)
elif isinstance(node, ast.Name):
new = self.qualified_name_prefixes.get(node.id)
if new:
return new
return f"<locals>.{node.id}"
else:
raise ValueError(f"Complex expression: {type(node)}")

def visit_Import(self, node: ast.Import) -> None:
# .names
# alias = (identifier name, identifier? asname)
for a in node.names:
self.qualified_name_prefixes[a.asname or a.name] = a.name

def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
# .identifier / .level
# .names
# alias = (identifier name, identifier? asname)
if node.module:
prefix = f"{node.module}."
else:
prefix = "." * node.level

for a in node.names:
self.qualified_name_prefixes[a.asname or a.name] = prefix + a.name


class Unknown:
pass


UNKNOWN = Unknown()


class SetupFindingVisitor(QualifiedNameSaver):
def __init__(self) -> None:
super().__init__()
self.setup_call_args: Optional[Dict[str, Any]] = None
self.setup_call_kwargs: Optional[bool] = None

def visit_Call(self, node: ast.Call) -> None:
# .func (expr, can just be name)
# .args
# .keywords
qn = self.qualified_name(node.func)
if qn in ("setuptools.setup", "distutils.setup"):
self.setup_call_args = d = {}
self.setup_call_kwargs = False
# Positional args are rarely used
for k in node.keywords:
if not k.arg:
self.setup_call_kwargs = True
else:
try:
d[k.arg] = ast.literal_eval(k.value)
except ValueError: # malformed node or string...
d[k.arg] = UNKNOWN


if __name__ == "__main__":
import sys
from pathlib import Path

mod = ast.parse(Path(sys.argv[1]).read_bytes())
v = SetupFindingVisitor()
v.visit(mod)
print(v.setup_call_args)
34 changes: 34 additions & 0 deletions metadata_please/tests/source_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,40 @@ def test_setuptools_empty(self) -> None:
basic_metadata_from_source_checkout(Path(d)),
)

def test_setuppy_empty(self) -> None:
with tempfile.TemporaryDirectory() as d:
Path(d, "setup.py").write_text("")
self.assertEqual(
BasicMetadata((), frozenset()),
basic_metadata_from_source_checkout(Path(d)),
)

def test_setuppy_trivial(self) -> None:
with tempfile.TemporaryDirectory() as d:
Path(d, "setup.py").write_text("from setuptools import setup; setup()")
self.assertEqual(
BasicMetadata((), frozenset()),
basic_metadata_from_source_checkout(Path(d)),
)

def test_setuppy(self) -> None:
with tempfile.TemporaryDirectory() as d:
Path(d, "setup.py").write_text(
"import setuptools; setuptools.setup(install_requires=['a'], extras_require={'b': ['c']})"
)
self.assertEqual(
BasicMetadata(["a", 'c ; extra == "b"'], frozenset("b")),
basic_metadata_from_source_checkout(Path(d)),
)

def test_setuppy_toocomplex(self) -> None:
with tempfile.TemporaryDirectory() as d:
Path(d, "setup.py").write_text(
"from setuptools import setup; setup(install_requires=blarg)"
)
with self.assertRaises(ValueError):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ought to match on message too

basic_metadata_from_source_checkout(Path(d))

def test_setuptools_extras(self) -> None:
with tempfile.TemporaryDirectory() as d:
Path(d, "setup.cfg").write_text(
Expand Down
Loading