Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explore modifying Zig's parser to fit the needs of a language server #1536

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/DocumentStore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const translate_c = @import("translate_c.zig");
const ComptimeInterpreter = @import("ComptimeInterpreter.zig");
const AstGen = std.zig.AstGen;
const Zir = std.zig.Zir;
const Parser = @import("stage2/Ast.zig");
const InternPool = @import("analyser/InternPool.zig");
const DocumentScope = @import("DocumentScope.zig");

Expand Down Expand Up @@ -486,8 +487,15 @@ pub const Handle = struct {
const tracy_zone_inner = tracy.traceNamed(@src(), "Ast.parse");
defer tracy_zone_inner.end();

var tree = try Ast.parse(allocator, new_text, .zig);
errdefer tree.deinit(allocator);
var zls_ast = try Parser.parse(allocator, new_text, .zig);
errdefer zls_ast.deinit(allocator);
var tree = Ast{
.source = zls_ast.source,
.tokens = zls_ast.tokens,
.nodes = zls_ast.nodes,
.extra_data = zls_ast.extra_data,
.errors = zls_ast.errors,
};

// remove unused capacity
var nodes = tree.nodes.toMultiArrayList();
Expand Down
92 changes: 92 additions & 0 deletions src/stage2/Ast.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
//! Abstract Syntax Tree for Zig source code.
//! For Zig syntax, the root node is at nodes[0] and contains the list of
//! sub-nodes.
//! For Zon syntax, the root node is at nodes[0] and contains lhs as the node
//! index of the main expression.

/// Reference to externally-owned data.
source: [:0]const u8,

tokens: std.zig.Ast.TokenList.Slice,
/// The root AST node is assumed to be index 0. Since there can be no
/// references to the root node, this means 0 is available to indicate null.
nodes: std.zig.Ast.NodeList.Slice,
extra_data: []std.zig.Ast.Node.Index,

errors: []const std.zig.Ast.Error,

pub fn deinit(tree: *Ast, gpa: Allocator) void {
tree.tokens.deinit(gpa);
tree.nodes.deinit(gpa);
gpa.free(tree.extra_data);
gpa.free(tree.errors);
tree.* = undefined;
}

pub const Mode = enum { zig, zon };

/// Result should be freed with tree.deinit() when there are
/// no more references to any of the tokens or nodes.
pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!Ast {
var tokens = std.zig.Ast.TokenList{};
defer tokens.deinit(gpa);

// Empirically, the zig std lib has an 8:1 ratio of source bytes to token count.
const estimated_token_count = source.len / 8;
try tokens.ensureTotalCapacity(gpa, estimated_token_count);

var tokenizer = std.zig.Tokenizer.init(source);
while (true) {
const token = tokenizer.next();
try tokens.append(gpa, .{
.tag = token.tag,
.start = @as(u32, @intCast(token.loc.start)),
});
if (token.tag == .eof) break;
}

var parser: Parse = .{
.source = source,
.gpa = gpa,
.token_tags = tokens.items(.tag),
.token_starts = tokens.items(.start),
.errors = .{},
.nodes = .{},
.extra_data = .{},
.scratch = .{},
.tok_i = 0,
};
defer parser.errors.deinit(gpa);
defer parser.nodes.deinit(gpa);
defer parser.extra_data.deinit(gpa);
defer parser.scratch.deinit(gpa);

// Empirically, Zig source code has a 2:1 ratio of tokens to AST nodes.
// Make sure at least 1 so we can use appendAssumeCapacity on the root node below.
const estimated_node_count = (tokens.len + 2) / 2;
try parser.nodes.ensureTotalCapacity(gpa, estimated_node_count);

switch (mode) {
.zig => try parser.parseRoot(),
.zon => try parser.parseZon(),
}

// TODO experiment with compacting the MultiArrayList slices here
return Ast{
.source = source,
.tokens = tokens.toOwnedSlice(),
.nodes = parser.nodes.toOwnedSlice(),
.extra_data = try parser.extra_data.toOwnedSlice(gpa),
.errors = try parser.errors.toOwnedSlice(gpa),
};
}

const std = @import("std");
const testing = std.testing;
const Ast = @This();
const Allocator = std.mem.Allocator;
const Parse = @import("Parse.zig");

test {
testing.refAllDecls(@This());
}
Loading