Skip to content

Commit

Permalink
Merge branch 'main' into exec-time-optimizations
Browse files Browse the repository at this point in the history
# Conflicts:
#	code-submissions-clustering-ij/build.gradle.kts
#	code-submissions-clustering-ij/src/main/kotlin/org/jetbrains/research/code/submissions/clustering/impl/unifiers/AbstractUnifier.kt
#	code-submissions-clustering-ij/src/main/kotlin/org/jetbrains/research/code/submissions/clustering/server/CodeServerImpl.kt
#	code-submissions-clustering-ij/src/main/kotlin/org/jetbrains/research/code/submissions/clustering/server/CodeServerStarter.kt
  • Loading branch information
eartser committed Sep 13, 2023
2 parents 525c086 + e65ed8a commit ddee839
Show file tree
Hide file tree
Showing 19 changed files with 170 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.gradle
/build/
*/build
*.log

# Ignore Gradle GUI config
gradle-app.setting
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ python3 -m src.server.start_ij_servers

The script will indicate when all configured IJ servers are ready to listen.

Optionally, you can specify a path to the server config and save logs from the server to the specified directory:

```
python3 -m src.server.start_ij_servers --config=</path/to/config/file> --logs_dir=</path/to/logs/dir>
```

## Getting clustering results

After the servers have started, configure and run `Run cluster command` run configuration to cluster submissions.
Expand Down
3 changes: 3 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ allprojects {
repositories {
mavenCentral()
maven("https://packages.jetbrains.team/maven/p/big-code/bigcode")
maven("https://packages.jetbrains.team/maven/p/bumblebee/bumblebee")
}

dependencies {
Expand All @@ -48,6 +49,8 @@ allprojects {
implementation(rootProject.libs.grpc.stub.kotlin)
implementation(rootProject.libs.kotlinx.coroutines.core)
implementation(rootProject.libs.kotlinx.serialization.json)
implementation(rootProject.libs.kotlin.logging)
implementation(rootProject.libs.logback)

implementation(rootProject.libs.plugin.utilities.core)
implementation(rootProject.libs.plugin.utilities.test)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package org.jetbrains.research.code.submissions.clustering.load.clustering.hac

import mu.KotlinLogging
import org.jetbrains.research.code.submissions.clustering.load.clustering.*
import org.jetbrains.research.code.submissions.clustering.util.IdentifierFactoryImpl
import org.jetbrains.research.code.submissions.clustering.util.parallel.ParallelContext
import org.jetbrains.research.code.submissions.clustering.util.parallel.ParallelUtils.combineWith
import org.jgrapht.Graph
import java.util.*
import java.util.function.Consumer
import java.util.logging.Logger
import kotlin.collections.set

/**
Expand All @@ -23,7 +23,7 @@ class GraphHierarchicalAgglomerativeClustering<V, E>(
private val distanceLimit: Double,
private val minClustersCount: Int = 1,
) : GraphClusterer<V, E> {
private val logger = Logger.getLogger(javaClass.name)
private val logger = KotlinLogging.logger { Unit }
private val heap: SortedSet<ClusterTriple> = TreeSet()
private val triples: MutableMap<Long, ClusterTriple> = HashMap()
private val clusters: MutableSet<Cluster<V>> = HashSet()
Expand Down Expand Up @@ -72,31 +72,31 @@ class GraphHierarchicalAgglomerativeClustering<V, E>(

@Suppress("TooGenericExceptionCaught")
override fun buildClustering(graph: Graph<V, E>): ClusteredGraph<V> {
logger.finer { "Clusterer initialization started" }
logger.debug { "Clusterer initialization started" }
init(graph)
logger.finer { "Clusterer initialization finished" }
logger.finer { "Clustering started" }
logger.debug { "Clusterer initialization finished" }
logger.debug { "Clustering started" }
while (heap.isNotEmpty() && clusters.size > minClustersCount) {
val minTriple: ClusterTriple = heap.first()
invalidateTriple(minTriple)
val first = minTriple.first
val second = minTriple.second
logger.fine {
logger.debug {
"""Merging clusters:
|$minTriple
""".trimMargin()
}
try {
mergeCommunities(first, second)
} catch (ex: Throwable) {
logger.severe {
logger.error {
"""Clusters merging error {$ex}:
|$minTriple
""".trimMargin()
}
}
}
logger.finer { "Clustering finished" }
logger.debug { "Clustering finished" }
if (clusters.size == 1) {
return buildClusteredGraph { add(clusters.first()) }
}
Expand Down
15 changes: 10 additions & 5 deletions code-submissions-clustering-ij/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ version = rootProject.version

dependencies {
implementation(project(":code-submissions-clustering-core"))
implementation("org.jetbrains.research.ml.ast.transformations:ast-transformations-core") {
version {
branch = "master"
}
}
implementation(libs.ast.transformations.core)
implementation(libs.kotlin.argparser)
implementation(libs.zip4j)
implementation(libs.gumtreediff.core)
Expand Down Expand Up @@ -40,6 +36,13 @@ abstract class BaseCLITask : RunIdeTask() {
standardOutput = System.`out`
}

fun setLogsDir(logsDir: String) {
jvmArgs = mutableListOf<String>().apply {
add("-Dlogs_dir=$logsDir")
jvmArgs?.let { addAll(it) }
}
}

fun setArgs(block: MutableList<String>.() -> Unit = {}) {
args = mutableListOf<String>().apply {
add(taskName.get())
Expand All @@ -61,11 +64,13 @@ tasks {
dependsOn(build)
val port: String? by project
val language: String? by project
val logsDir: String? by project
val transformationsConfig: String? by project

val pathToTransformationsConfig = transformationsConfig
?: "${project.projectDir}/src/main/resources/transformations-config.json"

setLogsDir(logsDir ?: project.parent!!.projectDir.toString())
setArgs {
port?.let { add("--port=$it") }
language?.let { add("--language=$it") }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.jetbrains.research.code.submissions.clustering.SubmissionsEdge
import org.jetbrains.research.code.submissions.clustering.model.Submission
import org.jetbrains.research.code.submissions.clustering.model.SubmissionsGraphAlias
import org.jetbrains.research.code.submissions.clustering.model.SubmissionsGraphEdge
import org.jetbrains.research.code.submissions.clustering.model.SubmissionsNode
import java.io.Closeable
import java.util.concurrent.TimeUnit

Expand All @@ -19,7 +20,7 @@ class CodeServerClientImpl(private val channel: ManagedChannel) : Closeable {
}

suspend fun unify(submission: Submission): Submission {
val request = submissionsCode(submission.code)
val request = submissionsCode(submission)
val unifiedSubmissionCode = stub.unify(request)
return submission.copy(code = unifiedSubmissionCode.code)
}
Expand All @@ -29,8 +30,8 @@ class CodeServerClientImpl(private val channel: ManagedChannel) : Closeable {
graph: SubmissionsGraphAlias,
): Int {
val request = submissionsEdge(
submissionsCode(graph.getEdgeSource(edge).code),
submissionsCode(graph.getEdgeTarget(edge).code)
submissionsCode(graph.getEdgeSource(edge)),
submissionsCode(graph.getEdgeTarget(edge))
)
return stub.calculateWeight(request).weight
}
Expand All @@ -39,10 +40,18 @@ class CodeServerClientImpl(private val channel: ManagedChannel) : Closeable {

suspend fun clearDistMeasurer() = stub.clearDistMeasurer(Empty.newBuilder().build())

private fun submissionsCode(code: String) =
private fun submissionsCode(submission: Submission) =
submissionsCode(submission.code, submission.stepId, submission.info.id)

private fun submissionsCode(submissionsNode: SubmissionsNode) =
submissionsCode(submissionsNode.code, submissionsNode.stepId, submissionsNode.id)

private fun submissionsCode(code: String, stepId: Int, id: Int) =
SubmissionCode
.newBuilder()
.setCode(code)
.setStepId(stepId)
.setId(id)
.build()

private fun submissionsEdge(from: SubmissionCode, to: SubmissionCode) =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jetbrains.research.code.submissions.clustering.impl.context.gumtree

import mu.KotlinLogging
import net.lingala.zip4j.ZipFile
import org.jetbrains.research.code.submissions.clustering.util.Command
import org.jetbrains.research.code.submissions.clustering.util.getTmpDirPath
Expand All @@ -10,14 +11,13 @@ import java.net.URL
import java.nio.file.Files
import java.nio.file.Paths
import java.nio.file.StandardCopyOption
import java.util.logging.Logger

/**
* [GumTreeParserUtil] is created for parser setup before using GumTree with python code.
* Also [checkSetup] should be called before running tests.
*/
object GumTreeParserUtil {
private val LOG = Logger.getLogger(javaClass.name)
private val LOG = KotlinLogging.logger { Unit }
private const val PYTHONPARSER_PROPERTY = "gt.pp.path"
private const val PARSER_REPO_ZIP_URL =
"https://github.com/JetBrains-Research/pythonparser/archive/master.zip"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import com.intellij.openapi.project.Project
import com.intellij.psi.PsiDocumentManager
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiFile
import mu.KotlinLogging
import io.ktor.utils.io.*
import org.jetbrains.research.code.submissions.clustering.impl.util.logging.TransformationsStatisticsBuilder
import org.jetbrains.research.code.submissions.clustering.impl.util.psi.PsiFileFactory
Expand All @@ -16,7 +17,6 @@ import org.jetbrains.research.code.submissions.clustering.load.unifiers.Unifier
import org.jetbrains.research.code.submissions.clustering.model.Language
import org.jetbrains.research.code.submissions.clustering.model.Submission
import org.jetbrains.research.ml.ast.transformations.Transformation
import java.util.logging.Logger

/**
* Abstract unifier producing unifying transformations over code submissions.
Expand All @@ -25,7 +25,8 @@ import java.util.logging.Logger
abstract class AbstractUnifier(
private val project: Project
) : Unifier {
private val logger = Logger.getLogger(javaClass.name)
private val logger = KotlinLogging.logger { Unit }
private val statisticsLogger = KotlinLogging.logger("TransformationsStatsLogger")
abstract val language: Language
abstract val singleRunTransformations: List<Transformation>
abstract val repeatingTransformations: List<Transformation>
Expand All @@ -37,7 +38,7 @@ abstract class AbstractUnifier(
statsBuilder: TransformationsStatisticsBuilder,
previousTree: PsiElement? = null,
) {
logger.fine { "Tree Started: ${this.text}" }
logger.debug { "Tree Started: ${this.text}" }

val psiDocumentManager = this.project.service<PsiDocumentManager>()
val document = psiDocumentManager.getDocument(this)
Expand All @@ -46,7 +47,7 @@ abstract class AbstractUnifier(
applyTransformation(it, this, document, psiDocumentManager, statsBuilder)
}
} catch (e: Throwable) {
logger.severe {
logger.error {
"""Transformation error {$e}:
|Previous Code=${previousTree?.text}
|Current Code=${this.text}
Expand All @@ -59,6 +60,8 @@ abstract class AbstractUnifier(
override suspend fun Submission.unify(): Submission {
val statsBuilder = TransformationsStatisticsBuilder()
skipTransformations = mutableSetOf()
statisticsLogger.info { "Unification: STEP_ID=$stepId ID=${info.id}" }
logger.debug { "Unification: STEP_ID=$stepId ID=${info.id}" }
val code = this.code.let { code ->
val psi = psiFileFactory.getPsiFile(code)
ApplicationManager.getApplication().invokeAndWait {
Expand All @@ -68,21 +71,21 @@ abstract class AbstractUnifier(
++iterationNumber
val previousTree = psi.copy()
psi.applyTransformations(repeatingTransformations, statsBuilder, previousTree)
logger.finer { "Previous text[$iterationNumber]:\n${previousTree.text}\n" }
logger.finer { "Current text[$iterationNumber]:\n${psi.text}\n\n" }
logger.debug { "Previous text[$iterationNumber]:\n${previousTree.text}\n" }
logger.debug { "Current text[$iterationNumber]:\n${psi.text}\n\n" }
} while (!previousTree.textMatches(psi.text) && iterationNumber <= MAX_ITERATIONS)
singleRunTransformations.forEach {
isFinishedWithTimeout(it, psi, statsBuilder)
}
logger.fine { "Tree Ended[[$iterationNumber]]: ${psi.text}\n\n\n" }
logger.info { "Total iterations number: $iterationNumber" }
logger.debug { "Tree Ended[[$iterationNumber]]: ${psi.text}\n\n\n" }
statisticsLogger.info { "Total iterations number: $iterationNumber" }
}
}
psi.reformatInWriteAction().text.also {
psiFileFactory.releasePsiFile(psi)
}
}
logger.info {
statisticsLogger.info {
statsBuilder.buildStatistics(singleRunTransformations + repeatingTransformations)
}
return this.copy(code = code)
Expand All @@ -95,11 +98,11 @@ abstract class AbstractUnifier(
psiDocumentManager: PsiDocumentManager,
statsBuilder: TransformationsStatisticsBuilder,
) {
logger.fine { "Transformation Started: ${transformation.key}" }
logger.debug { "Transformation Started: ${transformation.key}" }
if (!isFinishedWithTimeout(transformation, psiTree, statsBuilder)) {
logger.severe { "Transformation Skipped: ${transformation.key}" }
logger.warn { "Transformation Skipped: ${transformation.key}" }
} else {
logger.fine { "Transformation Ended: ${transformation.key}" }
logger.debug { "Transformation Ended: ${transformation.key}" }
document?.let {
psiDocumentManager.commitDocument(document)
}
Expand All @@ -116,7 +119,7 @@ abstract class AbstractUnifier(
} ?: run {
// Skip transformation with timeout in further iterations
skipTransformations.add(transformation)
logger.severe { "Timeout reached for ${transformation.key}" }
logger.warn { "Timeout reached for ${transformation.key}" }
return false
}
return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import io.grpc.Server
import io.grpc.ServerBuilder
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.runBlocking
import mu.KotlinLogging
import org.jetbrains.research.code.submissions.clustering.SubmissionCode
import org.jetbrains.research.code.submissions.clustering.SubmissionsEdge
import org.jetbrains.research.code.submissions.clustering.SubmissionsWeight
Expand All @@ -13,10 +14,9 @@ import org.jetbrains.research.code.submissions.clustering.impl.unifiers.Transfor
import org.jetbrains.research.code.submissions.clustering.model.Language
import org.jetbrains.research.code.submissions.clustering.model.Submission
import org.jetbrains.research.code.submissions.clustering.model.SubmissionInfo
import java.util.logging.Logger

class CodeServerImpl(private val port: Int, language: Language, transformationsConfig: TransformationsConfig) {
private val logger: Logger = Logger.getLogger(javaClass.name)
private val logger = KotlinLogging.logger { Unit }
private val graphContext = GumTreeGraphContextBuilder()
.setLanguage(language)
.configureTransformations(transformationsConfig)
Expand Down Expand Up @@ -50,7 +50,7 @@ class CodeServerImpl(private val port: Int, language: Language, transformationsC
private suspend fun unifyImpl(request: SubmissionCode): SubmissionCode {
logger.info("Receive request: \n${request.code}")
val code = graphContext.unifier.run {
createMockSubmission(request.code).unify().code
createMockSubmission(request.code, request.id, request.stepId).unify().code
}
logger.info("Unification finished")
return SubmissionCode.newBuilder().setCode(code).build()
Expand All @@ -74,5 +74,6 @@ class CodeServerImpl(private val port: Int, language: Language, transformationsC
return SubmissionsWeight.newBuilder().setWeight(weight).build()
}

private fun createMockSubmission(code: String) = Submission(SubmissionInfo(0, 0), 0, code)
private fun createMockSubmission(code: String, id: Int = 0, stepId: Int = 0) =
Submission(SubmissionInfo(id, 0), stepId, code)
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package org.jetbrains.research.code.submissions.clustering.server

import kotlinx.coroutines.channels.Channel
import mu.KotlinLogging
import org.jetbrains.research.code.submissions.clustering.*
import java.util.logging.Logger

class CodeServerServiceImpl(
private val requestChannel: Channel<CodeServerRequest>,
private val responseChannel: Channel<CodeServerResponse>
) : CodeServerGrpcKt.CodeServerCoroutineImplBase() {
private val logger: Logger = Logger.getLogger(javaClass.name)
private val logger = KotlinLogging.logger { Unit }

override suspend fun unify(request: SubmissionCode): SubmissionCode {
requestChannel.send(UnifyRequest(request))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.decodeFromStream
import org.jetbrains.research.code.submissions.clustering.impl.unifiers.TransformationsConfig
import mu.KotlinLogging
import org.jetbrains.research.code.submissions.clustering.model.Language
import java.nio.file.Paths
import java.util.logging.Logger
import kotlin.system.exitProcess

@Suppress("TooGenericExceptionCaught")
class CodeServerStarter : ApplicationStarter {
private val logger: Logger = Logger.getLogger(javaClass.name)
private val logger = KotlinLogging.logger { Unit }
private var portId: Int = BASE_PORT
override val commandName: String = "ij-code-server"
private lateinit var lang: Language
Expand All @@ -27,7 +27,7 @@ class CodeServerStarter : ApplicationStarter {
val server = CodeServerImpl(portId, lang, transformationsCfg)
server.start()
} catch (ex: Throwable) {
logger.severe { ex.stackTraceToString() }
logger.error { ex.stackTraceToString() }
exitProcess(1)
} finally {
exitProcess(0)
Expand Down
Loading

0 comments on commit ddee839

Please sign in to comment.