Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions Analyzer/Resources/Init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,38 @@ CREATE TABLE IF NOT EXISTS objects
PRIMARY KEY (id)
);

-- Deduplicated lookup tables for the strings referenced by the refs table.
-- refs stores ids into these instead of repeating the strings on every row.
CREATE TABLE IF NOT EXISTS property_names
(
id INTEGER PRIMARY KEY,
name TEXT
);

CREATE TABLE IF NOT EXISTS property_types
(
id INTEGER PRIMARY KEY,
name TEXT
);

CREATE TABLE IF NOT EXISTS refs
(
object INTEGER,
referenced_object INTEGER,
property_path TEXT,
property_type TEXT
property_path INTEGER,
property_type INTEGER
);

-- Reproduces the pre-normalization refs shape (property_path/property_type as text)
-- so queries can read the strings without joining the lookup tables by hand.
-- INNER JOIN: every refs row is written with both ids present and their lookup rows
-- inserted in the same transaction, so the joins always match (the ids are foreign keys).
CREATE VIEW refs_view AS
SELECT r.object, r.referenced_object, pn.name AS property_path, pt.name AS property_type
FROM refs r
INNER JOIN property_names pn ON r.property_path = pn.id
INNER JOIN property_types pt ON r.property_type = pt.id;

CREATE VIEW object_view AS
SELECT o.id, o.object_id, ab.name AS asset_bundle, sf.name AS serialized_file, t.name AS type, o.name, o.game_object, o.size,
CASE
Expand Down Expand Up @@ -89,19 +113,24 @@ ORDER BY size DESC, instances DESC;
CREATE VIEW view_material_shader_refs AS
SELECT m.id material_id, m.name material_name, a.name material_path, m.asset_bundle material_asset_bundle, s.id shader_id, s.name shader_name, s.asset_bundle shader_asset_bundle
FROM object_view m
INNER JOIN refs r ON m.id = r.object AND r.property_path = 'm_Shader'
INNER JOIN refs_view r ON m.id = r.object AND r.property_path = 'm_Shader'
INNER JOIN object_view s ON r.referenced_object = s.id
LEFT JOIN assets a ON m.id = a.object;

CREATE VIEW view_material_texture_refs AS
SELECT m.id material_id, m.name material_name, a.name material_path, m.asset_bundle material_asset_bundle, t.id texture_id, t.name texture_name, t.asset_bundle texture_asset_bundle
FROM object_view m
INNER JOIN refs r ON r.object = m.id AND property_type = "Texture"
INNER JOIN refs_view r ON r.object = m.id AND property_type = 'Texture'
INNER JOIN object_view t ON r.referenced_object = t.id
LEFT JOIN assets a ON m.id = a.object
WHERE m.type = "Material";
WHERE m.type = 'Material';

INSERT INTO types (id, name) VALUES (-1, 'Scene');

-- Database schema version. Bump when the schema changes in a way that tools relying on it
-- (e.g. find-refs) cannot read from an older database. 1 = normalized refs table (issue #44);
-- databases produced before versioning report 0.
PRAGMA user_version = 1;

PRAGMA synchronous = OFF;
PRAGMA journal_mode = MEMORY;
2 changes: 1 addition & 1 deletion Analyzer/Resources/MonoScript.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ SELECT
mb.name,
mb.size
FROM object_view mb
INNER JOIN refs r ON mb.id = r.object
INNER JOIN refs_view r ON mb.id = r.object
INNER JOIN monoscript_view ms ON r.referenced_object = ms.id
WHERE mb.type = 'MonoBehaviour' AND r.property_type = 'MonoScript';
27 changes: 27 additions & 0 deletions Analyzer/SQLite/Commands/SerializedFile/AddPropertyName.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using System.Collections.Generic;
using Microsoft.Data.Sqlite;
using UnityDataTools.Analyzer.SQLite.Commands;

namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile
{
/* TABLE DEFINITION:
create table property_names
(
id INTEGER,
name TEXT,
PRIMARY KEY (id)
);
*/
internal class AddPropertyName : AbstractCommand
{
protected override string TableName => "property_names";

protected override string DDLSource => null;

protected override Dictionary<string, SqliteType> Fields => new()
{
{ "id", SqliteType.Integer },
{ "name", SqliteType.Text }
};
}
}
27 changes: 27 additions & 0 deletions Analyzer/SQLite/Commands/SerializedFile/AddPropertyType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using System.Collections.Generic;
using Microsoft.Data.Sqlite;
using UnityDataTools.Analyzer.SQLite.Commands;

namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile
{
/* TABLE DEFINITION:
create table property_types
(
id INTEGER,
name TEXT,
PRIMARY KEY (id)
);
*/
internal class AddPropertyType : AbstractCommand
{
protected override string TableName => "property_types";

protected override string DDLSource => null;

protected override Dictionary<string, SqliteType> Fields => new()
{
{ "id", SqliteType.Integer },
{ "name", SqliteType.Text }
};
}
}
11 changes: 5 additions & 6 deletions Analyzer/SQLite/Commands/SerializedFile/AddReference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile
create table refs
(
object INTEGER,
referenced_object INTEGER,
property_path TEXT,
property_type TEXT,
PRIMARY KEY (object, referenced_object, property_path)
referenced_object INTEGER,
property_path INTEGER, -- id into property_names
property_type INTEGER -- id into property_types
);
*/
internal class AddReference : AbstractCommand
Expand All @@ -24,8 +23,8 @@ internal class AddReference : AbstractCommand
{
{ "object", SqliteType.Integer },
{ "referenced_object", SqliteType.Integer },
{ "property_path", SqliteType.Text },
{ "property_type", SqliteType.Text }
{ "property_path", SqliteType.Integer },
{ "property_type", SqliteType.Integer }
};
}
}
49 changes: 47 additions & 2 deletions Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ public class SerializedFileSQLiteWriter : IDisposable
private IdProvider<string> m_SerializedFileIdProvider = new();
private ObjectIdProvider m_ObjectIdProvider = new();

// The refs table stores ids into these deduplicated string tables instead of repeating the
// property path/type strings on every row. Ids are assigned lazily and are global across all
// files; the HashSets track which ids have already had their lookup row written.
private IdProvider<string> m_PropertyPathIdProvider = new();
private IdProvider<string> m_PropertyTypeIdProvider = new();
private HashSet<int> m_PropertyPathSet = new();
private HashSet<int> m_PropertyTypeSet = new();

private Regex m_RegexSceneFile = new(@"BuildPlayer-([^\.]+)(?:\.sharedAssets)?");

// Used to map PPtr fileId to its corresponding serialized file id in the database.
Expand All @@ -46,6 +54,8 @@ public class SerializedFileSQLiteWriter : IDisposable

// serialized files
private AddReference m_AddReferenceCommand = new AddReference();
private AddPropertyName m_AddPropertyNameCommand = new AddPropertyName();
private AddPropertyType m_AddPropertyTypeCommand = new AddPropertyType();
private AddAssetBundle m_AddAssetBundleCommand = new AddAssetBundle();
private AddSerializedFile m_AddSerializedFileCommand = new AddSerializedFile();
private AddObject m_AddObjectCommand = new AddObject();
Expand Down Expand Up @@ -82,6 +92,8 @@ private void CreateSQLiteCommands()

// build serialized file commands
m_AddReferenceCommand.CreateCommand(m_Database);
m_AddPropertyNameCommand.CreateCommand(m_Database);
m_AddPropertyTypeCommand.CreateCommand(m_Database);
m_AddAssetBundleCommand.CreateCommand(m_Database);
m_AddSerializedFileCommand.CreateCommand(m_Database);
m_AddObjectCommand.CreateCommand(m_Database);
Expand Down Expand Up @@ -289,17 +301,48 @@ private int AddReference(long objectId, int fileId, long pathId, string property

if (!m_SkipReferences)
{
var propertyPathId = GetPropertyPathId(propertyPath);
var propertyTypeId = GetPropertyTypeId(propertyType);

m_AddReferenceCommand.SetTransaction(m_CurrentTransaction);
m_AddReferenceCommand.SetValue("object", objectId);
m_AddReferenceCommand.SetValue("referenced_object", referencedObjectId);
m_AddReferenceCommand.SetValue("property_path", propertyPath);
m_AddReferenceCommand.SetValue("property_type", propertyType);
m_AddReferenceCommand.SetValue("property_path", propertyPathId);
m_AddReferenceCommand.SetValue("property_type", propertyTypeId);
m_AddReferenceCommand.ExecuteNonQuery();
}

return referencedObjectId;
}

// Resolve a property path/type string to its id, writing the lookup row the first time the
// string is seen. Called within the current transaction (references are being extracted).
private int GetPropertyPathId(string propertyPath)
{
var id = m_PropertyPathIdProvider.GetId(propertyPath);
if (m_PropertyPathSet.Add(id))
{
m_AddPropertyNameCommand.SetTransaction(m_CurrentTransaction);
m_AddPropertyNameCommand.SetValue("id", id);
m_AddPropertyNameCommand.SetValue("name", propertyPath);
m_AddPropertyNameCommand.ExecuteNonQuery();
}
return id;
}

private int GetPropertyTypeId(string propertyType)
{
var id = m_PropertyTypeIdProvider.GetId(propertyType);
if (m_PropertyTypeSet.Add(id))
{
m_AddPropertyTypeCommand.SetTransaction(m_CurrentTransaction);
m_AddPropertyTypeCommand.SetValue("id", id);
m_AddPropertyTypeCommand.SetValue("name", propertyType);
m_AddPropertyTypeCommand.ExecuteNonQuery();
}
return id;
}

public void Dispose()
{
foreach (var handler in m_Handlers.Values)
Expand All @@ -311,6 +354,8 @@ public void Dispose()
m_AddAssetBundleCommand.Dispose();
m_AddSerializedFileCommand.Dispose();
m_AddReferenceCommand.Dispose();
m_AddPropertyNameCommand.Dispose();
m_AddPropertyTypeCommand.Dispose();
m_AddObjectCommand.Dispose();
m_AddTypeCommand.Dispose();
m_InsertDepCommand.Dispose();
Expand Down
2 changes: 1 addition & 1 deletion Documentation/analyze-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ Alternatively, you can write the query manually using the underlying tables:
```
SELECT mb.asset_bundle, mb.serialized_file, mb.name, mb.object_id
FROM object_view mb
INNER JOIN refs r ON mb.id = r.object
INNER JOIN refs_view r ON mb.id = r.object
INNER JOIN monoscript_view ms ON r.referenced_object = ms.id
WHERE mb.type = 'MonoBehaviour'
AND r.property_type = 'MonoScript'
Expand Down
20 changes: 20 additions & 0 deletions Documentation/analyzer.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,26 @@ This view lists all the shaders aggregated by name. The *instances* column indic
the shader was found in the data files. It also provides the total size per shader and the list of
AssetBundles in which they were found.

## refs / refs_view

The `refs` table records the references between objects: for each reference it stores the source
`object`, the `referenced_object`, and the property that holds the reference. On large builds this
table dominates the database size, so the property strings are deduplicated into two lookup tables
and `refs` stores integer ids into them:

* `property_names`: distinct property paths (e.g. `m_Shader`, `m_Materials[0]`).
* `property_types`: distinct referenced types (e.g. `Texture2D`, `MonoScript`).

The `refs_view` rejoins these so the original strings are available directly. Query `refs_view`
(columns `object`, `referenced_object`, `property_path`, `property_type`) rather than joining the
lookup tables by hand:

```sql
SELECT * FROM refs_view WHERE property_type = 'MonoScript';
```

These tables are not populated when analyze is run with `--skip-references`.

## BuildReport

See [BuildReport.md](buildreport.md) for details of the tables and views related to analyzing BuildReport files.
Expand Down
2 changes: 1 addition & 1 deletion Documentation/contentlayout.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ContentLayout.json

`ContentLayout.json` describes the content that a content directory build produced. It is written by [`BuildPipeline.BuildContentDirectory`](https://docs.unity3d.com/6000.6/Documentation/ScriptReference/BuildPipeline.BuildContentDirectory.html) into the build report directory, alongside the other build report files. For an overview of the build report directory and the other files it contains, see [Build report and build history](https://docs.unity3d.com/6000.6/Documentation/Manual/build-reporting.html) in the Unity Manual.
`ContentLayout.json` describes the content that a content directory build produced. It is written by [`BuildPipeline.BuildContentDirectory`](https://docs.unity3d.com/6000.6/Documentation/ScriptReference/BuildPipeline.BuildContentDirectory.html) into the build report directory, alongside the other build report files. For an overview of the build report directory and the other files it contains, see [Build report and build history](https://docs.unity3d.com/6000.6/Documentation/Manual/build-history.html) in the Unity Manual.

This page explains what the file contains conceptually to aid in creation of build-analysis tooling or inspection of content directory build output. The C# types that define the schema are published alongside this documentation in [`ContentLayout.cs`](../UnityDataModels/ContentLayout.cs), which is the authoritative reference for the individual fields.

Expand Down
33 changes: 31 additions & 2 deletions ReferenceFinder/ReferenceFinderTool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ public ReferenceTreeNode(long id)

public class ReferenceFinderTool
{
// Minimum analyze database schema version find-refs can read. The normalized refs table
// (issue #44) is version 1; databases produced before schema versioning report 0.
const long RequiredSchemaVersion = 1;

SqliteCommand m_GetRefsCommand;
SqliteCommand m_GetObjectCommand;
List<ReferenceTreeNode> m_Roots = new List<ReferenceTreeNode>();
Expand Down Expand Up @@ -106,6 +110,19 @@ static SqliteConnection OpenDatabase(string databasePath)
}.ConnectionString;
var db = new SqliteConnection(connectionString);
db.Open();

using (var versionCmd = db.CreateCommand())
{
versionCmd.CommandText = "PRAGMA user_version";
var version = (long)versionCmd.ExecuteScalar();
if (version < RequiredSchemaVersion)
{
Console.WriteLine("The provided database uses an unsupported schema version. Re-run 'analyze' on the Unity content to regenerate it.");
db.Dispose();
return null;
}
}

return db;
}
catch (Exception e)
Expand All @@ -120,9 +137,20 @@ int FindReferences(SqliteConnection db, string outputFile, IList<long> objectIds
m_Writer = toStdout ? Console.Out : new StreamWriter(outputFile);

m_GetRefsCommand = db.CreateCommand();
m_GetRefsCommand.CommandText = @"SELECT object, property_path, EXISTS (SELECT * FROM assets a WHERE a.object = r.object) FROM refs r WHERE referenced_object = @id";
m_GetRefsCommand.CommandText = @"SELECT object, property_path, EXISTS (SELECT * FROM assets a WHERE a.object = r.object) FROM refs_view r WHERE referenced_object = @id";
m_GetRefsCommand.Parameters.Add("@id", SqliteType.Integer);
Comment on lines 139 to 141

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed at the source in 2cc7e92 rather than with IFNULL: refs_view now uses INNER JOIN, so property_path can never be NULL (every refs row's ids are always populated and their lookup rows written in the same transaction). That makes GetString(1) here unconditionally safe. I avoided IFNULL deliberately — it would silently return '' for a genuinely corrupt/orphaned row instead of failing loudly, which would be worse for a read-only analysis DB.


// Resolve the 'm_Script' property path to its id once so the per-object script lookup below
// filters on the indexed integer column instead of scanning the property_names table.
long scriptPathId = -1;
using (var scriptPathCmd = db.CreateCommand())
{
scriptPathCmd.CommandText = "SELECT id FROM property_names WHERE name = 'm_Script'";
var result = scriptPathCmd.ExecuteScalar();
if (result != null)
scriptPathId = (long)result;
}

m_GetObjectCommand = db.CreateCommand();
m_GetObjectCommand.CommandText =
@"SELECT o.type, IFNULL(o.name, '') name,
Expand All @@ -134,12 +162,13 @@ FROM objects go
IIF (o.type = 'MonoBehaviour',
(SELECT s.name FROM objects s
LEFT JOIN refs r
ON r.referenced_object = s.id AND r.property_path = 'm_Script'
ON r.referenced_object = s.id AND r.property_path = @scriptPathId
WHERE r.object = o.id),
'') script
FROM object_view o
WHERE o.id = @id";
m_GetObjectCommand.Parameters.Add("@id", SqliteType.Integer);
m_GetObjectCommand.Parameters.AddWithValue("@scriptPathId", scriptPathId);

for (int i = 0; i < objectIds.Count; ++i)
{
Expand Down
4 changes: 2 additions & 2 deletions UnityDataTool.Tests/BuildReportTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ public async Task Analyze_BuildReport_ContainsExpectedReferences(
"No object should reference the BuildReport object");

var refsWithWrongPath = SQLTestHelper.QueryInt(db,
"SELECT COUNT(*) FROM refs WHERE property_path NOT LIKE 'm_Appendices[%]'");
"SELECT COUNT(*) FROM refs_view WHERE property_path NOT LIKE 'm_Appendices[%]'");
Assert.AreEqual(0, refsWithWrongPath, "All property_path values should match pattern 'm_Appendices[N]'");

SQLTestHelper.AssertQueryString(db, "SELECT DISTINCT property_type FROM refs", "Object",
SQLTestHelper.AssertQueryString(db, "SELECT DISTINCT property_type FROM refs_view", "Object",
"All references should have property_type 'Object'");

var objectsNotReferenced = SQLTestHelper.QueryInt(db,
Expand Down
Loading
Loading