PyprojectPoetryParser.kt
package com.depanalyzer.parser.python
import com.depanalyzer.parser.DependencySection
import com.depanalyzer.parser.Ecosystem
import com.depanalyzer.parser.ParsedDependency
import java.io.File
class PyprojectPoetryParser {
fun parse(pyprojectFile: File): List<ParsedDependency> {
require(pyprojectFile.exists() && pyprojectFile.isFile) {
"Invalid pyproject.toml path: ${pyprojectFile.absolutePath}"
}
require(pyprojectFile.name == "pyproject.toml") { "Expected pyproject.toml, got ${pyprojectFile.name}" }
val lines = pyprojectFile.readLines()
val result = mutableListOf<ParsedDependency>()
var section = ""
lines.forEach { rawLine ->
val line = rawLine.substringBefore('#').trim()
if (line.isBlank()) return@forEach
if (line.startsWith("[") && line.endsWith("]")) {
section = line.removePrefix("[").removeSuffix("]").trim()
return@forEach
}
val scope = when {
section == "tool.poetry.dependencies" -> "main"
section.startsWith("tool.poetry.group.") && section.endsWith(".dependencies") -> {
section.removePrefix("tool.poetry.group.").removeSuffix(".dependencies")
}
else -> null
} ?: return@forEach
val name = line.substringBefore('=').trim().trim('"', '\'')
if (name.isBlank() || name.equals("python", ignoreCase = true)) return@forEach
val value = line.substringAfter('=', "").trim()
val version = extractPoetryVersion(value)
val normalizedName = normalizePackageName(name)
result += ParsedDependency(
groupId = "pypi",
artifactId = normalizedName,
version = version,
scope = scope,
section = DependencySection.DEPENDENCIES,
ecosystem = Ecosystem.PYPI
)
}
val pep621Deps = parsePep621Dependencies(pyprojectFile.readText())
result += pep621Deps
return result.distinctBy { "${it.groupId}:${it.artifactId}" }
}
private fun parsePep621Dependencies(content: String): List<ParsedDependency> {
val dependenciesArrayRegex = Regex(
"""(?s)\[project]\s*.*?dependencies\s*=\s*\[(.*?)]"""
)
val match = dependenciesArrayRegex.find(content) ?: return emptyList()
val body = match.groupValues[1]
val itemRegex = Regex("""['\"]([^'\"]+)['\"]""")
return itemRegex.findAll(body).mapNotNull { item ->
val declaration = item.groupValues[1].trim()
val splitIndex = declaration.indexOfFirst { it in charArrayOf('<', '>', '=', '!', '~', ' ') }
val rawName = if (splitIndex >= 0) declaration.substring(0, splitIndex) else declaration
val name = rawName.trim().substringBefore('[')
if (name.isBlank()) return@mapNotNull null
val version = declaration.removePrefix(name).trim().takeIf { it.isNotBlank() }
ParsedDependency(
groupId = "pypi",
artifactId = normalizePackageName(name),
version = version,
scope = "main",
section = DependencySection.DEPENDENCIES,
ecosystem = Ecosystem.PYPI
)
}.toList()
}
private fun extractPoetryVersion(rawValue: String): String? {
val value = rawValue.trim()
if (value.isEmpty()) return null
if (value.startsWith("\"") || value.startsWith("'")) {
return value.trim('"', '\'').takeIf { it.isNotBlank() }
}
val versionRegex = Regex("""version\s*=\s*['\"]([^'\"]+)['\"]""")
return versionRegex.find(value)?.groupValues?.get(1)?.trim()?.takeIf { it.isNotBlank() }
}
private fun normalizePackageName(raw: String): String {
return raw.trim().lowercase().replace('_', '-')
}
}