diff --git a/Analyzer/Resources/Init.sql b/Analyzer/Resources/Init.sql index 6f384b5..963ae24 100644 --- a/Analyzer/Resources/Init.sql +++ b/Analyzer/Resources/Init.sql @@ -34,14 +34,38 @@ CREATE TABLE IF NOT EXISTS objects PRIMARY KEY (id) ); +-- Deduplicated lookup tables for the strings referenced by the refs table. +-- refs stores ids into these instead of repeating the strings on every row. +CREATE TABLE IF NOT EXISTS property_names +( + id INTEGER PRIMARY KEY, + name TEXT +); + +CREATE TABLE IF NOT EXISTS property_types +( + id INTEGER PRIMARY KEY, + name TEXT +); + CREATE TABLE IF NOT EXISTS refs ( object INTEGER, referenced_object INTEGER, - property_path TEXT, - property_type TEXT + property_path INTEGER, + property_type INTEGER ); +-- Reproduces the pre-normalization refs shape (property_path/property_type as text) +-- so queries can read the strings without joining the lookup tables by hand. +-- INNER JOIN: every refs row is written with both ids present and their lookup rows +-- inserted in the same transaction, so the joins always match (the ids are foreign keys). +CREATE VIEW refs_view AS +SELECT r.object, r.referenced_object, pn.name AS property_path, pt.name AS property_type +FROM refs r +INNER JOIN property_names pn ON r.property_path = pn.id +INNER JOIN property_types pt ON r.property_type = pt.id; + CREATE VIEW object_view AS SELECT o.id, o.object_id, ab.name AS asset_bundle, sf.name AS serialized_file, t.name AS type, o.name, o.game_object, o.size, CASE @@ -89,19 +113,24 @@ ORDER BY size DESC, instances DESC; CREATE VIEW view_material_shader_refs AS SELECT m.id material_id, m.name material_name, a.name material_path, m.asset_bundle material_asset_bundle, s.id shader_id, s.name shader_name, s.asset_bundle shader_asset_bundle FROM object_view m -INNER JOIN refs r ON m.id = r.object AND r.property_path = 'm_Shader' +INNER JOIN refs_view r ON m.id = r.object AND r.property_path = 'm_Shader' INNER JOIN object_view s ON r.referenced_object = s.id LEFT JOIN assets a ON m.id = a.object; CREATE VIEW view_material_texture_refs AS SELECT m.id material_id, m.name material_name, a.name material_path, m.asset_bundle material_asset_bundle, t.id texture_id, t.name texture_name, t.asset_bundle texture_asset_bundle FROM object_view m -INNER JOIN refs r ON r.object = m.id AND property_type = "Texture" +INNER JOIN refs_view r ON r.object = m.id AND property_type = 'Texture' INNER JOIN object_view t ON r.referenced_object = t.id LEFT JOIN assets a ON m.id = a.object -WHERE m.type = "Material"; +WHERE m.type = 'Material'; INSERT INTO types (id, name) VALUES (-1, 'Scene'); +-- Database schema version. Bump when the schema changes in a way that tools relying on it +-- (e.g. find-refs) cannot read from an older database. 1 = normalized refs table (issue #44); +-- databases produced before versioning report 0. +PRAGMA user_version = 1; + PRAGMA synchronous = OFF; PRAGMA journal_mode = MEMORY; diff --git a/Analyzer/Resources/MonoScript.sql b/Analyzer/Resources/MonoScript.sql index 3b0a36b..c2f51eb 100644 --- a/Analyzer/Resources/MonoScript.sql +++ b/Analyzer/Resources/MonoScript.sql @@ -29,6 +29,6 @@ SELECT mb.name, mb.size FROM object_view mb -INNER JOIN refs r ON mb.id = r.object +INNER JOIN refs_view r ON mb.id = r.object INNER JOIN monoscript_view ms ON r.referenced_object = ms.id WHERE mb.type = 'MonoBehaviour' AND r.property_type = 'MonoScript'; diff --git a/Analyzer/SQLite/Commands/SerializedFile/AddPropertyName.cs b/Analyzer/SQLite/Commands/SerializedFile/AddPropertyName.cs new file mode 100644 index 0000000..8838113 --- /dev/null +++ b/Analyzer/SQLite/Commands/SerializedFile/AddPropertyName.cs @@ -0,0 +1,27 @@ +using System.Collections.Generic; +using Microsoft.Data.Sqlite; +using UnityDataTools.Analyzer.SQLite.Commands; + +namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile +{ + /* TABLE DEFINITION: + create table property_names + ( + id INTEGER, + name TEXT, + PRIMARY KEY (id) + ); + */ + internal class AddPropertyName : AbstractCommand + { + protected override string TableName => "property_names"; + + protected override string DDLSource => null; + + protected override Dictionary Fields => new() + { + { "id", SqliteType.Integer }, + { "name", SqliteType.Text } + }; + } +} diff --git a/Analyzer/SQLite/Commands/SerializedFile/AddPropertyType.cs b/Analyzer/SQLite/Commands/SerializedFile/AddPropertyType.cs new file mode 100644 index 0000000..04119c7 --- /dev/null +++ b/Analyzer/SQLite/Commands/SerializedFile/AddPropertyType.cs @@ -0,0 +1,27 @@ +using System.Collections.Generic; +using Microsoft.Data.Sqlite; +using UnityDataTools.Analyzer.SQLite.Commands; + +namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile +{ + /* TABLE DEFINITION: + create table property_types + ( + id INTEGER, + name TEXT, + PRIMARY KEY (id) + ); + */ + internal class AddPropertyType : AbstractCommand + { + protected override string TableName => "property_types"; + + protected override string DDLSource => null; + + protected override Dictionary Fields => new() + { + { "id", SqliteType.Integer }, + { "name", SqliteType.Text } + }; + } +} diff --git a/Analyzer/SQLite/Commands/SerializedFile/AddReference.cs b/Analyzer/SQLite/Commands/SerializedFile/AddReference.cs index 8352129..94f1bd3 100644 --- a/Analyzer/SQLite/Commands/SerializedFile/AddReference.cs +++ b/Analyzer/SQLite/Commands/SerializedFile/AddReference.cs @@ -8,10 +8,9 @@ namespace UnityDataTools.Analyzer.SQLite.Commands.SerializedFile create table refs ( object INTEGER, - referenced_object INTEGER, - property_path TEXT, - property_type TEXT, - PRIMARY KEY (object, referenced_object, property_path) + referenced_object INTEGER, + property_path INTEGER, -- id into property_names + property_type INTEGER -- id into property_types ); */ internal class AddReference : AbstractCommand @@ -24,8 +23,8 @@ internal class AddReference : AbstractCommand { { "object", SqliteType.Integer }, { "referenced_object", SqliteType.Integer }, - { "property_path", SqliteType.Text }, - { "property_type", SqliteType.Text } + { "property_path", SqliteType.Integer }, + { "property_type", SqliteType.Integer } }; } } diff --git a/Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs b/Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs index 0496b0c..8fedaa9 100644 --- a/Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs +++ b/Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs @@ -25,6 +25,14 @@ public class SerializedFileSQLiteWriter : IDisposable private IdProvider m_SerializedFileIdProvider = new(); private ObjectIdProvider m_ObjectIdProvider = new(); + // The refs table stores ids into these deduplicated string tables instead of repeating the + // property path/type strings on every row. Ids are assigned lazily and are global across all + // files; the HashSets track which ids have already had their lookup row written. + private IdProvider m_PropertyPathIdProvider = new(); + private IdProvider m_PropertyTypeIdProvider = new(); + private HashSet m_PropertyPathSet = new(); + private HashSet m_PropertyTypeSet = new(); + private Regex m_RegexSceneFile = new(@"BuildPlayer-([^\.]+)(?:\.sharedAssets)?"); // Used to map PPtr fileId to its corresponding serialized file id in the database. @@ -46,6 +54,8 @@ public class SerializedFileSQLiteWriter : IDisposable // serialized files private AddReference m_AddReferenceCommand = new AddReference(); + private AddPropertyName m_AddPropertyNameCommand = new AddPropertyName(); + private AddPropertyType m_AddPropertyTypeCommand = new AddPropertyType(); private AddAssetBundle m_AddAssetBundleCommand = new AddAssetBundle(); private AddSerializedFile m_AddSerializedFileCommand = new AddSerializedFile(); private AddObject m_AddObjectCommand = new AddObject(); @@ -82,6 +92,8 @@ private void CreateSQLiteCommands() // build serialized file commands m_AddReferenceCommand.CreateCommand(m_Database); + m_AddPropertyNameCommand.CreateCommand(m_Database); + m_AddPropertyTypeCommand.CreateCommand(m_Database); m_AddAssetBundleCommand.CreateCommand(m_Database); m_AddSerializedFileCommand.CreateCommand(m_Database); m_AddObjectCommand.CreateCommand(m_Database); @@ -289,17 +301,48 @@ private int AddReference(long objectId, int fileId, long pathId, string property if (!m_SkipReferences) { + var propertyPathId = GetPropertyPathId(propertyPath); + var propertyTypeId = GetPropertyTypeId(propertyType); + m_AddReferenceCommand.SetTransaction(m_CurrentTransaction); m_AddReferenceCommand.SetValue("object", objectId); m_AddReferenceCommand.SetValue("referenced_object", referencedObjectId); - m_AddReferenceCommand.SetValue("property_path", propertyPath); - m_AddReferenceCommand.SetValue("property_type", propertyType); + m_AddReferenceCommand.SetValue("property_path", propertyPathId); + m_AddReferenceCommand.SetValue("property_type", propertyTypeId); m_AddReferenceCommand.ExecuteNonQuery(); } return referencedObjectId; } + // Resolve a property path/type string to its id, writing the lookup row the first time the + // string is seen. Called within the current transaction (references are being extracted). + private int GetPropertyPathId(string propertyPath) + { + var id = m_PropertyPathIdProvider.GetId(propertyPath); + if (m_PropertyPathSet.Add(id)) + { + m_AddPropertyNameCommand.SetTransaction(m_CurrentTransaction); + m_AddPropertyNameCommand.SetValue("id", id); + m_AddPropertyNameCommand.SetValue("name", propertyPath); + m_AddPropertyNameCommand.ExecuteNonQuery(); + } + return id; + } + + private int GetPropertyTypeId(string propertyType) + { + var id = m_PropertyTypeIdProvider.GetId(propertyType); + if (m_PropertyTypeSet.Add(id)) + { + m_AddPropertyTypeCommand.SetTransaction(m_CurrentTransaction); + m_AddPropertyTypeCommand.SetValue("id", id); + m_AddPropertyTypeCommand.SetValue("name", propertyType); + m_AddPropertyTypeCommand.ExecuteNonQuery(); + } + return id; + } + public void Dispose() { foreach (var handler in m_Handlers.Values) @@ -311,6 +354,8 @@ public void Dispose() m_AddAssetBundleCommand.Dispose(); m_AddSerializedFileCommand.Dispose(); m_AddReferenceCommand.Dispose(); + m_AddPropertyNameCommand.Dispose(); + m_AddPropertyTypeCommand.Dispose(); m_AddObjectCommand.Dispose(); m_AddTypeCommand.Dispose(); m_InsertDepCommand.Dispose(); diff --git a/Documentation/analyze-examples.md b/Documentation/analyze-examples.md index 6aaf81f..66787a6 100644 --- a/Documentation/analyze-examples.md +++ b/Documentation/analyze-examples.md @@ -152,7 +152,7 @@ Alternatively, you can write the query manually using the underlying tables: ``` SELECT mb.asset_bundle, mb.serialized_file, mb.name, mb.object_id FROM object_view mb -INNER JOIN refs r ON mb.id = r.object +INNER JOIN refs_view r ON mb.id = r.object INNER JOIN monoscript_view ms ON r.referenced_object = ms.id WHERE mb.type = 'MonoBehaviour' AND r.property_type = 'MonoScript' diff --git a/Documentation/analyzer.md b/Documentation/analyzer.md index be76661..62297b6 100644 --- a/Documentation/analyzer.md +++ b/Documentation/analyzer.md @@ -176,6 +176,26 @@ This view lists all the shaders aggregated by name. The *instances* column indic the shader was found in the data files. It also provides the total size per shader and the list of AssetBundles in which they were found. +## refs / refs_view + +The `refs` table records the references between objects: for each reference it stores the source +`object`, the `referenced_object`, and the property that holds the reference. On large builds this +table dominates the database size, so the property strings are deduplicated into two lookup tables +and `refs` stores integer ids into them: + +* `property_names`: distinct property paths (e.g. `m_Shader`, `m_Materials[0]`). +* `property_types`: distinct referenced types (e.g. `Texture2D`, `MonoScript`). + +The `refs_view` rejoins these so the original strings are available directly. Query `refs_view` +(columns `object`, `referenced_object`, `property_path`, `property_type`) rather than joining the +lookup tables by hand: + +```sql +SELECT * FROM refs_view WHERE property_type = 'MonoScript'; +``` + +These tables are not populated when analyze is run with `--skip-references`. + ## BuildReport See [BuildReport.md](buildreport.md) for details of the tables and views related to analyzing BuildReport files. diff --git a/Documentation/contentlayout.md b/Documentation/contentlayout.md index 3e9a5b7..74f4f76 100644 --- a/Documentation/contentlayout.md +++ b/Documentation/contentlayout.md @@ -1,6 +1,6 @@ # ContentLayout.json -`ContentLayout.json` describes the content that a content directory build produced. It is written by [`BuildPipeline.BuildContentDirectory`](https://docs.unity3d.com/6000.6/Documentation/ScriptReference/BuildPipeline.BuildContentDirectory.html) into the build report directory, alongside the other build report files. For an overview of the build report directory and the other files it contains, see [Build report and build history](https://docs.unity3d.com/6000.6/Documentation/Manual/build-reporting.html) in the Unity Manual. +`ContentLayout.json` describes the content that a content directory build produced. It is written by [`BuildPipeline.BuildContentDirectory`](https://docs.unity3d.com/6000.6/Documentation/ScriptReference/BuildPipeline.BuildContentDirectory.html) into the build report directory, alongside the other build report files. For an overview of the build report directory and the other files it contains, see [Build report and build history](https://docs.unity3d.com/6000.6/Documentation/Manual/build-history.html) in the Unity Manual. This page explains what the file contains conceptually to aid in creation of build-analysis tooling or inspection of content directory build output. The C# types that define the schema are published alongside this documentation in [`ContentLayout.cs`](../UnityDataModels/ContentLayout.cs), which is the authoritative reference for the individual fields. diff --git a/ReferenceFinder/ReferenceFinderTool.cs b/ReferenceFinder/ReferenceFinderTool.cs index 104c3bf..e5e65ea 100644 --- a/ReferenceFinder/ReferenceFinderTool.cs +++ b/ReferenceFinder/ReferenceFinderTool.cs @@ -19,6 +19,10 @@ public ReferenceTreeNode(long id) public class ReferenceFinderTool { + // Minimum analyze database schema version find-refs can read. The normalized refs table + // (issue #44) is version 1; databases produced before schema versioning report 0. + const long RequiredSchemaVersion = 1; + SqliteCommand m_GetRefsCommand; SqliteCommand m_GetObjectCommand; List m_Roots = new List(); @@ -106,6 +110,19 @@ static SqliteConnection OpenDatabase(string databasePath) }.ConnectionString; var db = new SqliteConnection(connectionString); db.Open(); + + using (var versionCmd = db.CreateCommand()) + { + versionCmd.CommandText = "PRAGMA user_version"; + var version = (long)versionCmd.ExecuteScalar(); + if (version < RequiredSchemaVersion) + { + Console.WriteLine("The provided database uses an unsupported schema version. Re-run 'analyze' on the Unity content to regenerate it."); + db.Dispose(); + return null; + } + } + return db; } catch (Exception e) @@ -120,9 +137,20 @@ int FindReferences(SqliteConnection db, string outputFile, IList objectIds m_Writer = toStdout ? Console.Out : new StreamWriter(outputFile); m_GetRefsCommand = db.CreateCommand(); - m_GetRefsCommand.CommandText = @"SELECT object, property_path, EXISTS (SELECT * FROM assets a WHERE a.object = r.object) FROM refs r WHERE referenced_object = @id"; + m_GetRefsCommand.CommandText = @"SELECT object, property_path, EXISTS (SELECT * FROM assets a WHERE a.object = r.object) FROM refs_view r WHERE referenced_object = @id"; m_GetRefsCommand.Parameters.Add("@id", SqliteType.Integer); + // Resolve the 'm_Script' property path to its id once so the per-object script lookup below + // filters on the indexed integer column instead of scanning the property_names table. + long scriptPathId = -1; + using (var scriptPathCmd = db.CreateCommand()) + { + scriptPathCmd.CommandText = "SELECT id FROM property_names WHERE name = 'm_Script'"; + var result = scriptPathCmd.ExecuteScalar(); + if (result != null) + scriptPathId = (long)result; + } + m_GetObjectCommand = db.CreateCommand(); m_GetObjectCommand.CommandText = @"SELECT o.type, IFNULL(o.name, '') name, @@ -134,12 +162,13 @@ FROM objects go IIF (o.type = 'MonoBehaviour', (SELECT s.name FROM objects s LEFT JOIN refs r - ON r.referenced_object = s.id AND r.property_path = 'm_Script' + ON r.referenced_object = s.id AND r.property_path = @scriptPathId WHERE r.object = o.id), '') script FROM object_view o WHERE o.id = @id"; m_GetObjectCommand.Parameters.Add("@id", SqliteType.Integer); + m_GetObjectCommand.Parameters.AddWithValue("@scriptPathId", scriptPathId); for (int i = 0; i < objectIds.Count; ++i) { diff --git a/UnityDataTool.Tests/BuildReportTests.cs b/UnityDataTool.Tests/BuildReportTests.cs index 9cd5bd2..8f1549e 100644 --- a/UnityDataTool.Tests/BuildReportTests.cs +++ b/UnityDataTool.Tests/BuildReportTests.cs @@ -190,10 +190,10 @@ public async Task Analyze_BuildReport_ContainsExpectedReferences( "No object should reference the BuildReport object"); var refsWithWrongPath = SQLTestHelper.QueryInt(db, - "SELECT COUNT(*) FROM refs WHERE property_path NOT LIKE 'm_Appendices[%]'"); + "SELECT COUNT(*) FROM refs_view WHERE property_path NOT LIKE 'm_Appendices[%]'"); Assert.AreEqual(0, refsWithWrongPath, "All property_path values should match pattern 'm_Appendices[N]'"); - SQLTestHelper.AssertQueryString(db, "SELECT DISTINCT property_type FROM refs", "Object", + SQLTestHelper.AssertQueryString(db, "SELECT DISTINCT property_type FROM refs_view", "Object", "All references should have property_type 'Object'"); var objectsNotReferenced = SQLTestHelper.QueryInt(db, diff --git a/UnityDataTool.Tests/FindRefsTests.cs b/UnityDataTool.Tests/FindRefsTests.cs index efc6b70..500e8d5 100644 --- a/UnityDataTool.Tests/FindRefsTests.cs +++ b/UnityDataTool.Tests/FindRefsTests.cs @@ -304,6 +304,64 @@ SELECT COUNT(*) FROM refs Assert.AreEqual(2, count, "DirectAudioClipReference should reference both AudioClips"); } + // The refs table stores ids into property_names/property_types; refs_view rejoins them to expose the + // original strings. Verify a known MonoBehaviour -> MonoScript reference surfaces correctly through the view. + [Test] + public void RefsView_ExposesPropertyPathAndTypeStrings() + { + using var db = SQLTestHelper.OpenDatabase(m_DatabasePath); + + var monoScriptRefs = SQLTestHelper.QueryInt(db, @" + SELECT COUNT(*) FROM refs_view + WHERE property_type = 'MonoScript' AND property_path = 'm_Script' + AND object IN (SELECT id FROM object_view WHERE type = 'MonoBehaviour')"); + Assert.Greater(monoScriptRefs, 0, + "MonoBehaviours should have an m_Script reference of type MonoScript visible through refs_view"); + } + + // Every id stored in refs must resolve through the lookup tables, and the lookup tables must not be larger + // than the set of strings actually used (dedup should collapse repeats to one row each). + [Test] + public void RefsLookupTables_AreConsistentWithRefs() + { + using var db = SQLTestHelper.OpenDatabase(m_DatabasePath); + + SQLTestHelper.AssertQueryInt(db, + "SELECT COUNT(*) FROM refs WHERE property_path NOT IN (SELECT id FROM property_names)", 0, + "Every refs.property_path id must exist in property_names"); + SQLTestHelper.AssertQueryInt(db, + "SELECT COUNT(*) FROM refs WHERE property_type NOT IN (SELECT id FROM property_types)", 0, + "Every refs.property_type id must exist in property_types"); + + SQLTestHelper.AssertQueryInt(db, + "SELECT (SELECT COUNT(DISTINCT property_path) FROM refs_view) - (SELECT COUNT(*) FROM property_names)", 0, + "property_names should contain exactly the distinct property paths used by refs"); + SQLTestHelper.AssertQueryInt(db, + "SELECT (SELECT COUNT(DISTINCT property_type) FROM refs_view) - (SELECT COUNT(*) FROM property_types)", 0, + "property_types should contain exactly the distinct property types used by refs"); + } + + // find-refs must reject databases created before the normalized refs schema (user_version 0) with a clear + // message rather than an obscure SQL error. + [Test] + public async Task FindRefs_UnsupportedSchemaVersion_FailsCleanly() + { + var oldSchemaDb = Path.Combine(m_WorkFolder, "old_schema.db"); + File.Copy(m_DatabasePath, oldSchemaDb, true); + using (var db = SQLTestHelper.OpenDatabase(oldSchemaDb)) + { + using var cmd = db.CreateCommand(); + cmd.CommandText = "PRAGMA user_version = 0"; + cmd.ExecuteNonQuery(); + } + SqliteConnection.ClearAllPools(); + + var (exitCode, output) = await RunFindRefsOn(oldSchemaDb, new[] { "-n", "a", "-t", "AudioClip" }); + + Assert.AreNotEqual(0, exitCode); + Assert.That(output, Does.Contain("unsupported schema version")); + } + private static long QueryLong(SqliteConnection db, string sql) { using var cmd = db.CreateCommand(); diff --git a/UnityDataTool/UnityDataTool.csproj b/UnityDataTool/UnityDataTool.csproj index e4698b2..8426aa9 100644 --- a/UnityDataTool/UnityDataTool.csproj +++ b/UnityDataTool/UnityDataTool.csproj @@ -4,10 +4,10 @@ Exe net9.0 latest - 1.3.6 - 1.3.6.0 - 1.3.6.0 - 1.3.6 + 2.0.0 + 2.0.0.0 + 2.0.0.0 + 2.0.0