Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions go/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,19 @@ codeql_pkg_files(
prefix = "tools/{CODEQL_PLATFORM}",
)

codeql_pkg_files(
name = "canonicalize-dll",
srcs = select({
"@platforms//os:windows": ["//shared/canonicalize:pkg"],
"//conditions:default": [],
}),
prefix = "tools/{CODEQL_PLATFORM}",
)

codeql_pack(
name = "go",
srcs = [
":canonicalize-dll",
":extractor-pack-arch",
":resources",
"//go/codeql-tools",
Expand Down
1 change: 1 addition & 0 deletions go/extractor/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ go_library(
importpath = "github.com/github/codeql-go/extractor",
visibility = ["//visibility:public"],
deps = [
"//go/extractor/canonicalize",
"//go/extractor/dbscheme",
"//go/extractor/diagnostics",
"//go/extractor/srcarchive",
Expand Down
11 changes: 11 additions & 0 deletions go/extractor/canonicalize/BUILD.bazel

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions go/extractor/canonicalize/canonicalize_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//go:build !windows

package canonicalize

func CanonicalizePath(path string) string { return path }
65 changes: 65 additions & 0 deletions go/extractor/canonicalize/canonicalize_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//go:build windows

package canonicalize

import (
"os"
"path/filepath"
"syscall"
"unsafe"
)

var (
dll *syscall.DLL
procCanonicalize *syscall.Proc
procFree *syscall.Proc
available bool
)

func init() {
root := os.Getenv("CODEQL_EXTRACTOR_GO_ROOT")
if root == "" {
return
}
dllPath := filepath.Join(root, "tools", "win64", "codeql_canonical_path.dll")
d, err := syscall.LoadDLL(dllPath)
if err != nil {
return
}
p, err := d.FindProc("canonicalize_path_u8")
if err != nil {
return
}
f, _ := d.FindProc("canonicalize_free_u8")
dll = d
procCanonicalize = p
procFree = f
available = true
}

func CanonicalizePath(path string) string {
if !available {
return path
}
pathBytes := append([]byte(path), 0)
ret, _, _ := procCanonicalize.Call(uintptr(unsafe.Pointer(&pathBytes[0])))
if ret == 0 {
return path
}
result := bytePtrToString((*byte)(unsafe.Pointer(ret)))
if procFree != nil {
procFree.Call(ret)
}
return result
}

func bytePtrToString(p *byte) string {
if p == nil {
return ""
}
var n int
for ptr := unsafe.Pointer(p); *(*byte)(ptr) != 0; n++ {
ptr = unsafe.Add(ptr, 1)
}
return string(unsafe.Slice(p, n))
}
3 changes: 2 additions & 1 deletion go/extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sync"
"time"

"github.com/github/codeql-go/extractor/canonicalize"
"github.com/github/codeql-go/extractor/dbscheme"
"github.com/github/codeql-go/extractor/diagnostics"
"github.com/github/codeql-go/extractor/srcarchive"
Expand Down Expand Up @@ -766,7 +767,7 @@ func normalizedPath(ast *ast.File, fset *token.FileSet) string {
if err != nil {
return file
}
return path
return canonicalize.CanonicalizePath(path)
}

// extractFile extracts AST information for the given file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1242,11 +1242,11 @@ public static String relativePathLink (File f, File base)
public static File tryMakeCanonical (File f)
{
try {
return f.getCanonicalFile();
return NativeCanonicalizer.resolve(f.getCanonicalFile());
}
catch (IOException ignored) {
Exceptions.ignore(ignored, "Can't log error: Could be too verbose.");
return new File(simplifyPath(f));
return NativeCanonicalizer.resolve(new File(simplifyPath(f)));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.semmle.util.files;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;

public class NativeCanonicalizer {
private static final boolean available;

static {
boolean loaded = false;
if (File.separatorChar == '\\') {
String dist = System.getenv("CODEQL_DIST");
if (dist != null && !dist.isEmpty()) {
try {
Path library = Paths.get(dist).resolve("tools").resolve("win64")
.resolve("codeql_canonical_path.dll").toAbsolutePath();
System.load(library.toString());
loaded = true;
} catch (RuntimeException | UnsatisfiedLinkError ignored) {
}
}
}
available = loaded;
}

private NativeCanonicalizer() {}

// UTF-16 JNI interface - no encoding conversion
private static native String nativeCanonicalizePath(String path);

public static File resolve(File path) {
if (!available) return path;
String result = nativeCanonicalizePath(path.getAbsolutePath());
return result != null ? new File(result) : path;
}

public static boolean isAvailable() {
return available;
}
}
35 changes: 35 additions & 0 deletions shared/canonicalize/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_files")

cc_binary(
name = "codeql_canonical_path.dll",
srcs = [
"canonicalize.cpp",
"canonicalize.h",
"canonicalize_jni.cpp",
],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
linkshared = True,
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
deps = ["@rules_java//toolchains:jni"],
)

cc_library(
name = "canonicalize",
srcs = ["canonicalize.cpp"],
hdrs = ["canonicalize.h"],
defines = ["CODEQL_CANONICALIZE_EXPORTS"],
linkopts = ["-lkernel32"],
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)

pkg_files(
name = "pkg",
srcs = [":codeql_canonical_path.dll"],
attributes = pkg_attributes(mode = "0755"),
target_compatible_with = ["@platforms//os:windows"],
visibility = ["//visibility:public"],
)
165 changes: 165 additions & 0 deletions shared/canonicalize/canonicalize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#ifdef _WIN32
#include "canonicalize.h"
#include <windows.h>
#include <string>
#include <unordered_map>
#include <shared_mutex>
#include <random>

namespace {

class PathCache {
public:
static PathCache& instance() {
static PathCache cache;
return cache;
}

const wchar_t* canonicalize(const wchar_t* path) {
std::wstring key(path);

// Fast path: shared (read) lock for cache hit
{
std::shared_lock lock(mutex_);
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}
}

// Slow path: resolve and insert under exclusive lock
std::wstring resolved = resolve(path);
if (resolved.empty()) return nullptr;

std::unique_lock lock(mutex_);
// Check again under exclusive lock (another thread may have inserted)
auto it = cache_.find(key);
if (it != cache_.end()) {
return _wcsdup(it->second.c_str());
}

// Evict a random entry if at capacity (matches C# strategy)
if (cache_.size() >= max_capacity_) {
std::uniform_int_distribution<size_t> dist(0, cache_.size() - 1);
auto evict = cache_.begin();
std::advance(evict, dist(rng_));
cache_.erase(evict);
}

auto inserted = cache_.emplace(std::move(key), std::move(resolved)).first;
return _wcsdup(inserted->second.c_str());
}

private:
PathCache() = default;

static constexpr size_t max_capacity_ = 4096;
std::unordered_map<std::wstring, std::wstring> cache_;
std::shared_mutex mutex_;
std::mt19937 rng_{std::random_device{}()};

static std::wstring resolve(const wchar_t* path) {
HANDLE h = CreateFileW(
path,
0,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
nullptr);

if (h == INVALID_HANDLE_VALUE) {
return resolve_nonexistent(path);
}

std::wstring result = get_final_path(h);
CloseHandle(h);

if (result.empty()) return {};
return strip_prefix(result);
}

static std::wstring get_final_path(HANDLE h) {
wchar_t buf[MAX_PATH];
DWORD len = GetFinalPathNameByHandleW(h, buf, MAX_PATH, FILE_NAME_NORMALIZED);

if (len > 0 && len < MAX_PATH) {
return std::wstring(buf, len);
}
if (len >= MAX_PATH) {
std::wstring big(len + 1, L'\0');
len = GetFinalPathNameByHandleW(h, big.data(), len + 1, FILE_NAME_NORMALIZED);
if (len > 0) return std::wstring(big.data(), len);
}
return {};
}

static std::wstring strip_prefix(const std::wstring& path) {
constexpr std::wstring_view unc_prefix = L"\\\\?\\UNC\\";
constexpr std::wstring_view lp_prefix = L"\\\\?\\";

if (path.starts_with(unc_prefix)) {
return L"\\" + path.substr(unc_prefix.size() - 1);
}
if (path.starts_with(lp_prefix)) {
return std::wstring(path.substr(lp_prefix.size()));
}
return path;
}

// For non-existent files: canonicalize parent, append filename
// (matches C#'s ConstructCanonicalPath)
static std::wstring resolve_nonexistent(const wchar_t* path) {
std::wstring spath(path);
auto sep = spath.find_last_of(L"\\/");
if (sep == std::wstring::npos) return {};

std::wstring parent = spath.substr(0, sep);
std::wstring name = spath.substr(sep + 1);

std::wstring canonical_parent = resolve(parent.c_str());
if (canonical_parent.empty()) return {};

return canonical_parent + L"\\" + name;
}
};

} // namespace

extern "C" {

CODEQL_API const wchar_t* canonicalize_path_w(const wchar_t* path) {
if (!path || !*path) return nullptr;
return PathCache::instance().canonicalize(path);
}

CODEQL_API void canonicalize_free_w(const wchar_t* path) {
free(const_cast<wchar_t*>(path));
}

CODEQL_API const char* canonicalize_path_u8(const char* path) {
if (!path || !*path) return nullptr;

int wlen = MultiByteToWideChar(CP_UTF8, 0, path, -1, nullptr, 0);
if (wlen <= 0) return nullptr;
std::wstring wpath(wlen - 1, L'\0');
MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath.data(), wlen);

const wchar_t* wresult = PathCache::instance().canonicalize(wpath.c_str());
if (!wresult) return nullptr;

int ulen = WideCharToMultiByte(CP_UTF8, 0, wresult, -1, nullptr, 0, nullptr, nullptr);
if (ulen <= 0) { free(const_cast<wchar_t*>(wresult)); return nullptr; }
char* result = static_cast<char*>(malloc(ulen));
WideCharToMultiByte(CP_UTF8, 0, wresult, -1, result, ulen, nullptr, nullptr);
free(const_cast<wchar_t*>(wresult));

return result;
}

CODEQL_API void canonicalize_free_u8(const char* path) {
free(const_cast<char*>(path));
}

} // extern "C"
#endif
Loading
Loading