Initial commit: Add multilspy files from github.com/microsoft/monitor…

…s4codegen repository
microsoft · Aug 8, 2024 · 2c0ce92 · 2c0ce92
1 parent 70a3a97
commit 2c0ce92
Show file tree

Hide file tree

Showing 42 changed files with 15,055 additions and 15 deletions.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,82 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ "main" ]
+  schedule:
+    - cron: '22 13 * * 2'
+
+jobs:
+  analyze:
+    name: Analyze
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners
+    # Consider using larger runners for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
+        # Use only 'java' to analyze code written in Java, Kotlin or both
+        # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
+        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v2
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+
+    # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v2
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
+    #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+    # - run: |
+    #     echo "Run, Build Application using script"
+    #     ./location_of_script_within_repo/buildscript.sh
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v2
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,78 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: >-
+  Monitor-Guided Decoding of Code LMs with Static Analysis
+  of Repository Context
+message: >-
+  If you use this repository, please cite it using the metadata
+  from this file.
+type: dataset
+authors:
+  - given-names: Lakshya A
+    family-names: Agrawal
+    email: t-lakagrawal@microsoft.com
+    affiliation: Microsoft Research
+    orcid: 'https://orcid.org/0000-0003-0409-8212'
+  - given-names: Aditya
+    family-names: Kanade
+    email: kanadeaditya@microsoft.com
+    affiliation: Microsoft Research
+  - given-names: Navin
+    family-names: Goyal
+    email: navingo@microsoft.com
+    affiliation: Microsoft Research
+  - given-names: Shuvendu K.
+    family-names: Lahiri
+    email: shuvendu.lahiri@microsoft.com
+    affiliation: Microsoft Research
+  - given-names: Sriram K.
+    family-names: Rajamani
+    email: sriram@microsoft.com
+    affiliation: Microsoft Research
+identifiers:
+  - type: doi
+    value: 10.48550/arXiv.2306.10763
+  - type: url
+    value: >-
+      https://openreview.net/forum?id=qPUbKxKvXq&noteId=98Ukj82fSP
+abstract: >-
+  Language models of code (LMs) work well when the
+  surrounding code provides sufficient context. This is not
+  true when it becomes necessary to use types, functionality
+  or APIs defined elsewhere in the repository or a linked
+  library, especially those not seen during training. LMs
+  suffer from limited awareness of such global context and
+  end up hallucinating.
+
+
+  Integrated development environments (IDEs) assist
+  developers in understanding repository context using
+  static analysis. We extend this assistance, enjoyed by
+  developers, to LMs. We propose monitor-guided decoding
+  (MGD) where a monitor uses static analysis to guide the
+  decoding. We construct a repository-level dataset
+  PragmaticCode for method-completion in Java and evaluate
+  MGD on it. On models of varying parameter scale, by
+  monitoring for type-consistent object dereferences, MGD
+  consistently improves compilation rates and agreement with
+  ground truth. Further, LMs with fewer parameters, when
+  augmented with MGD, can outperform larger LMs. With MGD,
+  SantaCoder-1.1B achieves better compilation rate and
+  next-identifier match than the much larger
+  text-davinci-003 model.
+
+
+  We also conduct a generalizability study to evaluate the
+  ability of MGD to generalize to multiple programming
+  languages (Java, C# and Rust), coding scenarios (e.g.,
+  correct number of arguments to method calls), and to
+  enforce richer semantic constraints (e.g., stateful API
+  protocols). Our data and implementation are available at
+  https://github.com/microsoft/monitors4codegen.
+keywords:
+  - program analysis
+  - correctness
+  - code generation
+  - Language models
diff --git a/SUPPORT.md b/SUPPORT.md
@@ -1,13 +1,3 @@
-# TODO: The maintainer of this repo has not yet edited this file
-
-**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
-
-- **No CSS support:** Fill out this template with information about how to file issues and get help.
-- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
-- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
-
-*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
-
 # Support
 
 ## How to file issues and get help  
@@ -16,10 +6,7 @@ This project uses GitHub Issues to track bugs and feature requests. Please searc
 issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
 feature request as a new Issue.
 
-For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
-FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
-CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
+For help and questions about using this project, please create an issue with the label "question".
 
 ## Microsoft Support Policy  
-
-Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
+Support for `multilspy` is limited to the resources listed above.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -0,0 +1,59 @@
+# Starter pipeline
+# Start with a minimal pipeline that you can customize to build and deploy your code.
+# Add steps that build, run tests, deploy, and more:
+# https://aka.ms/yaml
+
+trigger:
+- main
+
+pool:
+  vmImage: ubuntu-latest
+
+steps:
+- script: echo Hello, world!
+  displayName: 'Run a one-line script'
+
+- script: |
+    echo Add other tasks to build, test, and deploy your project.
+    echo See https://aka.ms/yaml
+  displayName: 'Run a multi-line script'
+
+- task: ComponentGovernanceComponentDetection@0
+  inputs:
+    scanType: 'Register'
+    verbosity: 'Verbose'
+    alertWarningLevel: 'High'
+
+- task: CodeQL3000Init@0
+- task: CodeQL3000Finalize@0
+
+# - task: CredScan@2
+#   inputs:
+#     toolMajorVersion: 'V2' 
+
+# - task: ESLint@1
+#   inputs:
+#     Configuration: 'recommended'
+#     TargetType: 'eslint'
+#     ErrorLevel: 'warn'
+
+# - task: Semmle@0
+#   env:
+#     SYSTEM_ACCESSTOKEN: $(System.AccessToken)
+#   inputs:
+#     sourceCodeDirectory: '$(Build.SourcesDirectory)'
+#     language: 'tsandjs'
+#     includeNodeModules: true
+#     querySuite: 'Recommended'
+#     timeout: '1800'
+#     ram: '16384'
+#     addProjectDirToScanningExclusionList: true
+
+# - task: Semmle@1
+#   inputs:
+#     sourceCodeDirectory: '$(Build.SourcesDirectory)'
+#     language: 'python'
+#     querySuite: 'Recommended'
+#     timeout: '1800'
+#     ram: '16384'
+#     addProjectDirToScanningExclusionList: true
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,41 @@
+# Read https://setuptools.pypa.io/en/latest/userguide/datafiles.html
+[build-system]
+requires = ["flit_core>=3.4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = "multilspy"
+version = "0.0.1"
+authors = [
+  { name="Lakshya A Agrawal", email="lakshya.aagrawal@gmail.com" },
+]
+description = "A language-agnostic LSP client in Python, with a library interface. Intended to be used to build applications around language servers. Currently multilspy supports language servers for Python, Rust, Java and C#. Originally appeared as part of Monitor-Guided Decoding (https://github.com/microsoft/monitors4codegen)"
+readme = "README.md"
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Development Status :: 2 - Pre-Alpha",
+    "Topic :: Software Development",
+    "Topic :: Text Editors :: Integrated Development Environments (IDE)",
+    "Programming Language :: C#",
+    "Programming Language :: Java",
+    "Programming Language :: Python",
+    "Programming Language :: Rust"
+]
+
+dependencies = [
+  "jedi-language-server==0.41.1",
+  "pydantic==1.10.5",
+  "requests==2.32.3"
+]
+
+[project.urls]
+"Homepage" = "https://github.com/microsoft/multilspy"
+"Bug Tracker" = "https://github.com/microsoft/multilspy/issues"
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.packages.find]
+where = ["src"]
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+jedi-language-server==0.41.1
+pytest==7.3.1
+pydantic==1.10.5
+pytest-asyncio==0.21.1
+requests==2.32.3
diff --git a/src/multilspy/__init__.py b/src/multilspy/__init__.py
@@ -0,0 +1,8 @@
+"""
+This module contains the multilspy API
+"""
+
+from . import multilspy_types as Types
+from .language_server import LanguageServer, SyncLanguageServer
+
+__all__ = ["LanguageServer", "Types", "SyncLanguageServer"]