diff --git a/Cargo.toml b/Cargo.toml index 2d3e73af..0d340184 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,9 @@ members = [ "kvcache", "delta", "mercury", + "jupiter", + "jupiter/entity", + "venus", ] exclude = ["mda", "craft", "fuse"] @@ -74,16 +77,12 @@ russh = "0.42.0" russh-keys = "0.42.0" axum = "0.7.4" hex = "0.4.3" -sea-orm = { version = "0.12.14", features = [ - "sqlx-postgres", - "sqlx-mysql", - "runtime-tokio-rustls", - "macros", -] } +sea-orm = "0.12.14" redis = "0.24.0" flate2 = "1.0.28" bstr = "1.9.0" colored = "2.1.0" +idgenerator = "2.0.0" [build-dependencies] shadow-rs = "0.26.0" diff --git a/b_link.txt b/b_link.txt new file mode 100644 index 00000000..8b687286 --- /dev/null +++ b/b_link.txt @@ -0,0 +1,4 @@ +version https://gitmega.dev/spec/v1 +{{objectType}} {{sha1}} +storage_type {{type}} +storage_locaton {{location}} \ No newline at end of file diff --git a/common/Cargo.toml b/common/Cargo.toml index 02624c50..11772fa4 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -14,3 +14,4 @@ anyhow = { workspace = true } sea-orm = { workspace = true } thiserror = { workspace = true } clap = { workspace = true, features = ["derive"] } +idgenerator = { workspace = true } diff --git a/common/src/utils.rs b/common/src/utils.rs index 3800f86a..e830af31 100644 --- a/common/src/utils.rs +++ b/common/src/utils.rs @@ -2,7 +2,20 @@ //! //! +use idgenerator::IdInstance; + pub const ZERO_ID: &str = match std::str::from_utf8(&[b'0'; 40]) { Ok(s) => s, Err(_) => panic!("can't get ZERO_ID"), -}; \ No newline at end of file +}; + +pub fn generate_id() -> i64 { + let mut new_id: i64 = 0; + let mut times = 100; + while times > 0 { + // Call `next_id` to generate a new unique id. + new_id = IdInstance::next_id(); + times -= 1; + } + new_id +} diff --git a/docs/database.md b/docs/database.md index 9ba16ce9..3d78a655 100644 --- a/docs/database.md +++ b/docs/database.md @@ -15,8 +15,10 @@ Similar to the 'tree' in Git, Mega maintains relationships between files and fil The purpose of the B-link file is to store file index information, serving as a replacement for blobs in Git. The design of this structure is inspired by the specification of Git LFS, as follows: ```bash - version https://mega.com/directory/spec/v1 + version https://gitmega.dev/spec/v1 blob 3a739f77180d81aa45d9bd11eb6be7098bf1991f + storage_type local_fs + storage_location /tmp/.mega/{{reponame}}/.objects/3a/73/9f77180d81aa45d9bd11eb6be7098bf1991f ``` It includes the following records: @@ -58,46 +60,42 @@ It includes the following records: ### ER Diagram - ```mermaid +```mermaid erDiagram - msnap["MEGA-SNAPSHOT"] mc["MEGA-COMMITS"] mt["MEGA-TREE"] mb["MEGA-BLOB"] mtag["MEGA-TAG"] mmr["MEGA-MR"] - grp["GIT-REPO"] grf["GIT-REFS"] gc["GIT-COMMIT"] gt["GIT-TREE"] gb["GIT-BLOB"] gtag["GIT-TAG"] gp["GIT-PR"] gi["GIT-ISSUE"] - raw["RAW-OBJETCS"] - lo["LFS-OBJECTS"] lk["LFS-LOCKS"] - - msnap |o--|{ mc : "belong to" - msnap |o--|{ gt : contains - mc ||--|| mt : points - mc ||--|| raw : points - mc ||--|| mmr : "belong to" - mt }|--o{ mb : points - mt ||--|| raw : points - mt }|--|| mmr : "belong to" - mt }o..o{ gt : points - mb ||--|| raw : points - mb }|--|| mmr : "belong to" - mtag |o--o| mc : points - mtag ||--|| raw : points - raw ||--o| lo : points - lo ||--o| lk : points - gp }o--|| grp : "belong to" - gi }o--|| grp : "belong to" - grf ||--|| gc : points - grf ||--|| gtag : points - grf }|--|| grp : "belong to" - gc ||--|| gt : has - gc ||--|| raw : has - gc }|--|| grp : "belong to" - gt ||--o{ gb : has - gt ||--|| raw : points - gt }|--|| grp : "belong to" - gb ||--|| raw : points - gb }|--|| grp : "belong to" - gtag }o--|| grp : "belong to" - gtag |o--o| gc : points - gtag ||--|| raw : points - - ``` + + MEGA-SNAPSHOT |o--|{ MEGA-COMMITS : "belong to" + MEGA-SNAPSHOT |o--|{ GIT-TREE : contains + MEGA-COMMITS ||--|| MEGA-TREE : points + MEGA-COMMITS ||--|| RAW-OBJETCS : points + MEGA-COMMITS ||--|| MEGA-MR : "belong to" + MEGA-TREE }|--o{ MEGA-BLOB : points + MEGA-TREE ||--|| RAW-OBJETCS : points + MEGA-TREE }|--|| MEGA-MR : "belong to" + MEGA-TREE }o..o{ GIT-TREE : points + MEGA-BLOB ||--|| RAW-OBJETCS : points + MEGA-BLOB }|--|| MEGA-MR : "belong to" + MEGA-TAG |o--o| MEGA-COMMITS : points + MEGA-TAG ||--|| RAW-OBJETCS : points + RAW-OBJETCS ||--o| LFS-OBJECTS : points + LFS-OBJECTS ||--o| LFS-LOCKS : points + GIT-PR }o--|| GIT-REPO : "belong to" + GIT-ISSUE }o--|| GIT-REPO : "belong to" + GIT-REFS ||--|| GIT-COMMIT : points + GIT-REFS ||--|| GIT-TAG : points + GIT-REFS }|--|| GIT-REPO : "belong to" + GIT-COMMIT ||--|| GIT-TREE : has + GIT-COMMIT ||--|| RAW-OBJETCS : has + GIT-COMMIT }|--|| GIT-REPO : "belong to" + GIT-TREE ||--o{ GIT-BLOB : has + GIT-TREE ||--|| RAW-OBJETCS : points + GIT-TREE }|--|| GIT-REPO : "belong to" + GIT-BLOB ||--|| RAW-OBJETCS : points + GIT-BLOB }|--|| GIT-REPO : "belong to" + GIT-TAG }o--|| GIT-REPO : "belong to" + GIT-TAG |o--o| GIT-COMMIT : points + GIT-TAG ||--|| RAW-OBJETCS : points + +``` ### Table Details @@ -119,21 +117,21 @@ erDiagram #### mega_commit -| Column | Type | Constraints | Description | -| ---------- | ----------- | ----------- | ----------------------------------------------- | -| id | BIGINT | PRIMARY KEY | | -| commit_id | VARCHAR(40) | NOT NULL | | -| tree | VARCHAR(40) | NOT NULL | | -| parents_id | TEXT[] | | | -| author | TEXT | | | -| committer | TEXT | | | -| content | TEXT | | | -| mr_id | VARCHAR(20) | | | -| status | VARCHAR(20) | NOT NULL | mr satus, might be 'Open','Merged' and 'Closed' | -| size | INT | NOT NULL | used for magic sort in pack process | -| full_path | TEXT | NOT NULL | used for magic sort in pack process | -| created_at | TIMESTAMP | NOT NULL | | -| updated_at | TIMESTAMP | NOT NULL | | +| Column | Type | Constraints | Description | +| ---------- | ----------- | ----------- | ---------------------------------------------- | +| id | BIGINT | PRIMARY KEY | | +| commit_id | VARCHAR(40) | NOT NULL | | +| tree | VARCHAR(40) | NOT NULL | | +| parents_id | TEXT[] | NOT NULL | | +| author | TEXT | | | +| committer | TEXT | | | +| content | TEXT | | | +| mr_id | VARCHAR(20) | | | +| status | VARCHAR(20) | NOT NULL | mr satus, can be 'Open', 'Merged' and 'Closed' | +| size | INT | NOT NULL | used for magic sort in pack process | +| full_path | TEXT | NOT NULL | used for magic sort in pack process | +| created_at | TIMESTAMP | NOT NULL | | +| updated_at | TIMESTAMP | NOT NULL | | #### mega_tree @@ -188,7 +186,7 @@ erDiagram | id | BIGINT | PRIMARY KEY | | | mr_link | VARCHAR(40) | NOT NULL | A MR identifier with a length of 6-8 characters. | | mr_msg | VARCHAR(255) | NOT NULL | | -| merge_date | TIMESTAMP | NOT NULL | | +| merge_date | TIMESTAMP | | | | status | VARCHAR(20) | NOT NULL | | | created_at | TIMESTAMP | NOT NULL | | | updated_at | TIMESTAMP | NOT NULL | | @@ -215,7 +213,7 @@ erDiagram | repo_id | BIGINT | NOT NULL | | | ref_name | TEXT | NOT NULL | reference name, can be branch and tag | | ref_git_id | VARCHAR(40) | NOT NULL | point to the commit or tag object | -| is_commit | BOOLEAN | NOT NULL | set true if point to a commit | +| ref_type | VARCHAR(20) | NOT NULL | ref_type: can be 'tag' or 'branch' | | created_at | TIMESTAMP | NOT NULL | | | updated_at | TIMESTAMP | NOT NULL | | @@ -237,7 +235,7 @@ erDiagram | repo_id | BIGINT | NOT NULL | | commit_id | VARCHAR(40) | NOT NULL | | tree | VARCHAR(40) | NOT NULL | -| pid | TEXT[] | | +| parents_id | TEXT[] | NOT NULL | | author | TEXT | | | committer | TEXT | | | content | TEXT | | @@ -290,15 +288,15 @@ erDiagram #### raw_objects -| Column | Type | Constraints | Description | -| ------------------ | ----------- | ----------- | ----------------------------------------------------------------------- | -| id | BIGINT | PRIMARY KEY | | -| sha1 | VARCHAR(40) | NOT NULL | git object's sha1 hash | -| object_type | VARCHAR(20) | NOT NULL | | -| storage_type | INT | NOT NULL | data storage type, can be 0-database; 1-local file system; 2-remote url | -| data | BYTEA | | | -| local_storage_path | TEXT | | | -| remote_url | TEXT | | | +| Column | Type | Constraints | Description | +| ------------------ | ----------- | ----------- | ----------------------------------------------------------------- | +| id | BIGINT | PRIMARY KEY | | +| sha1 | VARCHAR(40) | NOT NULL | git object's sha1 hash | +| object_type | VARCHAR(20) | NOT NULL | | +| storage_type | INT | NOT NULL | data storage type, can be 'database', 'local-fs' and 'remote_url' | +| data | BYTEA | | | +| local_storage_path | TEXT | | | +| remote_url | TEXT | | | #### git_pr @@ -488,4 +486,4 @@ erDiagram - Generating entities: Entities can be generated from the database table structure with the following command -`sea-orm-cli generate entity -u "postgres://${DB_USERNAME}:${DB_SECRET}@${DB_HOST}/mega" -o database/entity/src` \ No newline at end of file +`sea-orm-cli generate entity -u "postgres://postgres:$postgres@localhost/mega_re" -o jupiter/entity/src` \ No newline at end of file diff --git a/jupiter/Cargo.toml b/jupiter/Cargo.toml new file mode 100644 index 00000000..63a42276 --- /dev/null +++ b/jupiter/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "jupiter" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "jupiter" +path = "src/lib.rs" + + +[dependencies] +common = { path = "../common" } +db_entity = { path = "./entity" } +venus = { path = "../venus" } + +sea-orm = { workspace = true, features = [ + "sqlx-postgres", + "sqlx-mysql", + "runtime-tokio-rustls", + "macros", +] } +tracing = { workspace = true } +bytes = { workspace = true } +chrono = { workspace = true } +async-trait = { workspace = true } +futures = { workspace = true } +serde_json ={ workspace = true } + +handlebars = "5.1.0" + +[dev-dependencies] +tokio = { workspace = true, features = ["macros"] } diff --git a/jupiter/entity/Cargo.toml b/jupiter/entity/Cargo.toml new file mode 100644 index 00000000..938e8d71 --- /dev/null +++ b/jupiter/entity/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "db_entity" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +name = "db_entity" +path = "src/lib.rs" + +[dependencies] +serde = { workspace = true, features = ["derive"] } +chrono = { workspace = true } +sea-orm = { workspace = true, features = ["sqlx-postgres", "sqlx-mysql"] } diff --git a/jupiter/entity/src/db_enums.rs b/jupiter/entity/src/db_enums.rs new file mode 100644 index 00000000..294347c8 --- /dev/null +++ b/jupiter/entity/src/db_enums.rs @@ -0,0 +1,45 @@ +use sea_orm::{DeriveActiveEnum, EnumIter}; + +#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)] +#[sea_orm(rs_type = "String", db_type = "String(Some(1))")] +pub enum StorageType { + #[sea_orm(string_value = "database")] + Database, + #[sea_orm(string_value = "local_fs")] + LocalFs, + #[sea_orm(string_value = "remote_url")] + RemoteUrl, +} + +impl ToString for StorageType { + fn to_string(&self) -> String { + match self { + StorageType::Database => String::from("database"), + StorageType::LocalFs => String::from("local_fs"), + StorageType::RemoteUrl => String::from("remote_url"), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)] +#[sea_orm(rs_type = "String", db_type = "String(Some(1))")] +pub enum MergeStatus { + #[sea_orm(string_value = "open")] + Open, + #[sea_orm(string_value = "merged")] + Merged, + #[sea_orm(string_value = "closed")] + Closed, +} + + + +#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)] +#[sea_orm(rs_type = "String", db_type = "String(Some(1))")] +pub enum RefType { + #[sea_orm(string_value = "branch")] + Branch, + #[sea_orm(string_value = "tag")] + Tag, +} + diff --git a/jupiter/entity/src/git_blob.rs b/jupiter/entity/src/git_blob.rs new file mode 100644 index 00000000..7351e7ef --- /dev/null +++ b/jupiter/entity/src/git_blob.rs @@ -0,0 +1,26 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_blob")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub repo_id: i64, + pub blob_id: String, + pub name: Option, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + #[sea_orm(column_type = "Text")] + pub content: String, + pub content_type: Option, + pub commit_id: String, + pub created_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_commit.rs b/jupiter/entity/src/git_commit.rs new file mode 100644 index 00000000..02170b97 --- /dev/null +++ b/jupiter/entity/src/git_commit.rs @@ -0,0 +1,29 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_commit")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub repo_id: i64, + pub commit_id: String, + pub tree: String, + pub parents_id: Vec, + #[sea_orm(column_type = "Text", nullable)] + pub author: Option, + #[sea_orm(column_type = "Text", nullable)] + pub committer: Option, + #[sea_orm(column_type = "Text", nullable)] + pub content: Option, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + pub created_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_issue.rs b/jupiter/entity/src/git_issue.rs new file mode 100644 index 00000000..4d466264 --- /dev/null +++ b/jupiter/entity/src/git_issue.rs @@ -0,0 +1,24 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_issue")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub number: i64, + pub title: String, + pub sender_name: String, + pub sender_id: i64, + pub state: String, + pub created_at: DateTime, + pub updated_at: DateTime, + pub closed_at: Option, + pub repo_id: i64, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_pr.rs b/jupiter/entity/src/git_pr.rs new file mode 100644 index 00000000..f945fe51 --- /dev/null +++ b/jupiter/entity/src/git_pr.rs @@ -0,0 +1,34 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_pr")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub number: i64, + pub title: String, + pub state: String, + pub created_at: DateTime, + pub updated_at: DateTime, + pub closed_at: Option, + pub merged_at: Option, + pub merge_commit_sha: Option, + pub repo_id: i64, + pub sender_name: String, + pub sender_id: i64, + pub user_name: String, + pub user_id: i64, + pub commits_url: String, + pub patch_url: String, + pub head_label: String, + pub head_ref: String, + pub base_label: String, + pub base_ref: String, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_refs.rs b/jupiter/entity/src/git_refs.rs new file mode 100644 index 00000000..1da7166b --- /dev/null +++ b/jupiter/entity/src/git_refs.rs @@ -0,0 +1,24 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +use crate::db_enums::RefType; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_refs")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub repo_id: i64, + #[sea_orm(column_type = "Text")] + pub ref_name: String, + pub ref_git_id: String, + pub ref_type: RefType, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_repo.rs b/jupiter/entity/src/git_repo.rs new file mode 100644 index 00000000..a43b8376 --- /dev/null +++ b/jupiter/entity/src/git_repo.rs @@ -0,0 +1,19 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_repo")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(column_type = "Text", unique)] + pub repo_path: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_tag.rs b/jupiter/entity/src/git_tag.rs new file mode 100644 index 00000000..c5a0690a --- /dev/null +++ b/jupiter/entity/src/git_tag.rs @@ -0,0 +1,28 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_tag")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub repo_id: i64, + #[sea_orm(unique)] + pub tag_id: String, + pub object_id: String, + pub object_type: Option, + #[sea_orm(column_type = "Text")] + pub tag_name: String, + #[sea_orm(column_type = "Text")] + pub tagger: String, + #[sea_orm(column_type = "Text")] + pub message: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/git_tree.rs b/jupiter/entity/src/git_tree.rs new file mode 100644 index 00000000..ab0d4b3d --- /dev/null +++ b/jupiter/entity/src/git_tree.rs @@ -0,0 +1,24 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "git_tree")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub repo_id: i64, + pub tree_id: String, + pub sub_trees: Option>, + pub name: Option, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + pub commit_id: String, + pub created_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/lfs_locks.rs b/jupiter/entity/src/lfs_locks.rs new file mode 100644 index 00000000..7205a2cb --- /dev/null +++ b/jupiter/entity/src/lfs_locks.rs @@ -0,0 +1,17 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "lfs_locks")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: String, + #[sea_orm(column_type = "Text", nullable)] + pub data: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/lfs_objects.rs b/jupiter/entity/src/lfs_objects.rs new file mode 100644 index 00000000..72b6f7c0 --- /dev/null +++ b/jupiter/entity/src/lfs_objects.rs @@ -0,0 +1,17 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "lfs_objects")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub oid: String, + pub size: Option, + pub exist: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/lib.rs b/jupiter/entity/src/lib.rs new file mode 100644 index 00000000..c99b4e5c --- /dev/null +++ b/jupiter/entity/src/lib.rs @@ -0,0 +1,23 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +pub mod prelude; + +pub mod git_blob; +pub mod git_commit; +pub mod git_issue; +pub mod git_pr; +pub mod git_refs; +pub mod git_repo; +pub mod git_tag; +pub mod git_tree; +pub mod lfs_locks; +pub mod lfs_objects; +pub mod mega_blob; +pub mod mega_commit; +pub mod mega_issue; +pub mod mega_mr; +pub mod mega_snapshot; +pub mod mega_tag; +pub mod mega_tree; +pub mod raw_objects; +pub mod db_enums; diff --git a/jupiter/entity/src/mega_blob.rs b/jupiter/entity/src/mega_blob.rs new file mode 100644 index 00000000..82b62614 --- /dev/null +++ b/jupiter/entity/src/mega_blob.rs @@ -0,0 +1,28 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_blob")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(unique)] + pub blob_id: String, + pub commit_id: String, + pub mr_id: Option, + pub status: String, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + #[sea_orm(column_type = "Text")] + pub content: String, + pub content_type: Option, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_commit.rs b/jupiter/entity/src/mega_commit.rs new file mode 100644 index 00000000..4516e57e --- /dev/null +++ b/jupiter/entity/src/mega_commit.rs @@ -0,0 +1,34 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +use crate::db_enums::MergeStatus; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_commit")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(unique)] + pub commit_id: String, + pub tree: String, + pub parents_id: Vec, + #[sea_orm(column_type = "Text", nullable)] + pub author: Option, + #[sea_orm(column_type = "Text", nullable)] + pub committer: Option, + #[sea_orm(column_type = "Text", nullable)] + pub content: Option, + pub mr_id: Option, + pub status: MergeStatus, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_issue.rs b/jupiter/entity/src/mega_issue.rs new file mode 100644 index 00000000..5132cb0e --- /dev/null +++ b/jupiter/entity/src/mega_issue.rs @@ -0,0 +1,23 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_issue")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub number: i64, + pub title: String, + pub sender_name: String, + pub sender_id: i64, + pub state: String, + pub created_at: DateTime, + pub updated_at: DateTime, + pub closed_at: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_mr.rs b/jupiter/entity/src/mega_mr.rs new file mode 100644 index 00000000..7600b251 --- /dev/null +++ b/jupiter/entity/src/mega_mr.rs @@ -0,0 +1,21 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_mr")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + pub mr_link: String, + pub mr_msg: String, + pub merge_date: Option, + pub status: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_snapshot.rs b/jupiter/entity/src/mega_snapshot.rs new file mode 100644 index 00000000..02ba1a9e --- /dev/null +++ b/jupiter/entity/src/mega_snapshot.rs @@ -0,0 +1,24 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_snapshot")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(column_type = "Text", unique)] + pub path: String, + pub import_dir: Option, + pub tree_id: Option, + pub sub_trees: Option>, + pub commit_id: Option, + pub size: i32, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_tag.rs b/jupiter/entity/src/mega_tag.rs new file mode 100644 index 00000000..444cf321 --- /dev/null +++ b/jupiter/entity/src/mega_tag.rs @@ -0,0 +1,27 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_tag")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(unique)] + pub tag_id: String, + pub object_id: String, + pub object_type: Option, + #[sea_orm(column_type = "Text")] + pub tag_name: String, + #[sea_orm(column_type = "Text")] + pub tagger: String, + #[sea_orm(column_type = "Text")] + pub message: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/mega_tree.rs b/jupiter/entity/src/mega_tree.rs new file mode 100644 index 00000000..c0a3b79d --- /dev/null +++ b/jupiter/entity/src/mega_tree.rs @@ -0,0 +1,26 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "mega_tree")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(unique)] + pub tree_id: String, + pub sub_trees: Option>, + pub import_dir: Option, + pub mr_id: Option, + pub status: String, + pub size: i32, + #[sea_orm(column_type = "Text")] + pub full_path: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/entity/src/prelude.rs b/jupiter/entity/src/prelude.rs new file mode 100644 index 00000000..99bec60c --- /dev/null +++ b/jupiter/entity/src/prelude.rs @@ -0,0 +1,20 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +pub use super::git_blob::Entity as GitBlob; +pub use super::git_commit::Entity as GitCommit; +pub use super::git_issue::Entity as GitIssue; +pub use super::git_pr::Entity as GitPr; +pub use super::git_refs::Entity as GitRefs; +pub use super::git_repo::Entity as GitRepo; +pub use super::git_tag::Entity as GitTag; +pub use super::git_tree::Entity as GitTree; +pub use super::lfs_locks::Entity as LfsLocks; +pub use super::lfs_objects::Entity as LfsObjects; +pub use super::mega_blob::Entity as MegaBlob; +pub use super::mega_commit::Entity as MegaCommit; +pub use super::mega_issue::Entity as MegaIssue; +pub use super::mega_mr::Entity as MegaMr; +pub use super::mega_snapshot::Entity as MegaSnapshot; +pub use super::mega_tag::Entity as MegaTag; +pub use super::mega_tree::Entity as MegaTree; +pub use super::raw_objects::Entity as RawObjects; diff --git a/jupiter/entity/src/raw_objects.rs b/jupiter/entity/src/raw_objects.rs new file mode 100644 index 00000000..6f5310e6 --- /dev/null +++ b/jupiter/entity/src/raw_objects.rs @@ -0,0 +1,27 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.11.3 + +use sea_orm::entity::prelude::*; + +use crate::db_enums::StorageType; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "raw_objects")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub id: i64, + #[sea_orm(unique)] + pub sha1: String, + pub object_type: String, + pub storage_type: StorageType, + #[sea_orm(column_type = "Binary(BlobSize::Blob(None))", nullable)] + pub data: Option>, + #[sea_orm(column_type = "Text", nullable)] + pub local_storage_path: Option, + #[sea_orm(column_type = "Text", nullable)] + pub remote_url: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/jupiter/src/lib.rs b/jupiter/src/lib.rs new file mode 100644 index 00000000..969ebdf7 --- /dev/null +++ b/jupiter/src/lib.rs @@ -0,0 +1,2 @@ +pub mod raw_storage; +pub mod storage; diff --git a/jupiter/src/raw_storage/local_storage.rs b/jupiter/src/raw_storage/local_storage.rs new file mode 100644 index 00000000..ef20b203 --- /dev/null +++ b/jupiter/src/raw_storage/local_storage.rs @@ -0,0 +1,194 @@ +use std::fs::{self, OpenOptions}; +use std::io::prelude::*; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use bytes::Bytes; + +use common::errors::MegaError; +use db_entity::db_enums::StorageType; + +use crate::raw_storage::RawStorage; + +#[derive(Default)] +pub struct LocalStorage { + base_path: PathBuf, +} + +impl LocalStorage { + pub fn init(base_path: PathBuf) -> LocalStorage { + fs::create_dir_all(&base_path).expect("Create directory failed!"); + LocalStorage { base_path } + } +} + +#[async_trait] +impl RawStorage for LocalStorage { + fn get_storage_type(&self) -> StorageType { + StorageType::LocalFs + } + + async fn get_ref(&self, repo_name: &str, ref_name: &str) -> Result { + let path = Path::new(&self.base_path).join(repo_name).join(ref_name); + let mut file = fs::File::open(path)?; + let mut buf = String::new(); + file.read_to_string(&mut buf)?; + Ok(buf) + } + + async fn put_ref( + &self, + repo_name: &str, + ref_name: &str, + ref_hash: &str, + ) -> Result<(), MegaError> { + let path = Path::new(&self.base_path).join(repo_name).join(ref_name); + let parent = path.parent().unwrap(); + fs::create_dir_all(parent)?; + let mut file = fs::File::create(path)?; + file.write_all(ref_hash.as_bytes())?; + Ok(()) + } + + async fn delete_ref(&self, repo_name: &str, ref_name: &str) -> Result<(), MegaError> { + let path = Path::new(&self.base_path).join(repo_name).join(ref_name); + Ok(fs::remove_file(path)?) + } + + async fn update_ref( + &self, + repo_name: &str, + ref_name: &str, + ref_hash: &str, + ) -> Result<(), MegaError> { + let path = Path::new(&self.base_path).join(repo_name).join(ref_name); + let mut file = OpenOptions::new().write(true).open(path).unwrap(); + file.write_all(ref_hash.as_bytes()).unwrap(); + Ok(()) + } + + async fn get_object(&self, repo_name: &str, object_id: &str) -> Result { + let path = Path::new(&self.base_path) + .join(repo_name) + .join("objects") + .join(self.transform_path(object_id)); + let mut file = + fs::File::open(&path).unwrap_or_else(|_| panic!("Open file:{:?} failed!", path)); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + Ok(Bytes::from(buffer)) + } + + async fn put_object( + &self, + repo_name: &str, + object_id: &str, + body_content: &[u8], + ) -> Result { + let path = Path::new(&self.base_path) + .join(repo_name) + .join("objects") + .join(self.transform_path(object_id)); + let dir = path.parent().unwrap(); + fs::create_dir_all(dir).expect("Create directory failed!"); + + let mut file = fs::File::create(&path).expect("Open file failed"); + file.write_all(body_content).expect("Write file failed"); + Ok(path.to_str().unwrap().to_string()) + } + + fn exist_object(&self, repo_name: &str, object_id: &str) -> bool { + let path = Path::new(&self.base_path) + .join(repo_name) + .join("objects") + .join(self.transform_path(object_id)); + Path::exists(&path) + } +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::path::Path; + use std::{env, path::PathBuf}; + + use crate::raw_storage::{local_storage::LocalStorage, RawStorage}; + + // #[test] + #[tokio::test] + async fn test_content_store() { + let oid = "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72".to_owned(); + let content = "test content".as_bytes().to_vec(); + + let mut source = PathBuf::from(env::current_dir().unwrap().parent().unwrap()); + source.push("tests/objects"); + + let local_storage = LocalStorage::init(source.clone()); + assert!(local_storage.put_object("", &oid, &content).await.is_ok()); + + assert!(local_storage.exist_object("", &oid)); + } + + #[tokio::test] + async fn test_put_ref() { + let test_path = PathBuf::from(env::current_dir().unwrap().parent().unwrap()).join("test"); + let storage = LocalStorage::init(test_path.clone()); + let ref_path = test_path.join("refs/tags/1.0"); + + storage + .put_ref( + "", + "refs/tags/1.0", + "5bb8ee25bac1014c15abc49c56d1ee0aab1050cb", + ) + .await + .unwrap(); + + assert!(Path::exists(&ref_path)); + fs::remove_file(ref_path).unwrap(); + } + + #[tokio::test] + async fn test_update_ref() { + let test_path = PathBuf::from(env::current_dir().unwrap().parent().unwrap()).join("tests"); + let storage = LocalStorage::init(test_path.clone()); + let ref_name = "refs/tags/2.0"; + let ref_path = test_path.join(ref_name); + + // init file + fs::write(&ref_path, "aa33dc413d3845d631d57169d87020f5c61c8652").unwrap(); + + // run test code + storage + .update_ref("", ref_name, "04ea005354bbbf8bf676fd97d8993a66ffeaa472") + .await + .unwrap(); + let buf = fs::read_to_string(&ref_path).unwrap(); + assert_eq!(buf, "04ea005354bbbf8bf676fd97d8993a66ffeaa472"); + // clean up resources + fs::remove_file(ref_path).unwrap(); + } + + #[tokio::test] + async fn test_delete_ref() { + let test_path = PathBuf::from(env::current_dir().unwrap().parent().unwrap()).join("tests"); + let ref_name = "refs/tags/3.0"; + let ref_path = test_path.join(ref_name); + + fs::write(&ref_path, "5bb8ee25bac1014c15abc49c56d1ee0aab1050cb").unwrap(); + + let storage = LocalStorage::init(test_path.clone()); + storage.delete_ref("", ref_name).await.unwrap(); + let ref_path = test_path.join(ref_name); + assert!(!Path::exists(&ref_path)); + } + + #[tokio::test] + async fn test_get_ref() { + let mut test_path = PathBuf::from(env::current_dir().unwrap().parent().unwrap()); + test_path.push("tests"); + let storage = LocalStorage::init(test_path.clone()); + let ref_hash = storage.get_ref("", "refs/heads/master").await.unwrap(); + assert_eq!(ref_hash, "5bb8ee25bac1014c15abc49c56d1ee0aab1050cb") + } +} diff --git a/jupiter/src/raw_storage/mod.rs b/jupiter/src/raw_storage/mod.rs new file mode 100644 index 00000000..8a7523e3 --- /dev/null +++ b/jupiter/src/raw_storage/mod.rs @@ -0,0 +1,128 @@ +use std::{ + env, + fs::File, + io::Read, + path::{self, PathBuf}, + sync::Arc, +}; + +use async_trait::async_trait; +use bytes::Bytes; +use handlebars::Handlebars; + +use common::errors::MegaError; +use db_entity::db_enums::StorageType; +use venus::internal::pack::entry::Entry; + +use crate::raw_storage::local_storage::LocalStorage; + +pub mod local_storage; + +#[derive(Debug, Clone, Default)] +pub struct BlobLink { + pub version: String, + pub object_type: String, + pub storage_type: String, + pub storge_location: String, +} + +#[async_trait] +pub trait RawStorage: Sync + Send { + fn get_storage_type(&self) -> StorageType; + + async fn get_ref(&self, repo_name: &str, ref_name: &str) -> Result; + + async fn put_ref(&self, repo_name: &str, ref_name: &str, ref_hash: &str) -> Result<(), MegaError>; + + async fn delete_ref(&self, repo_name: &str, ref_name: &str) -> Result<(), MegaError>; + + async fn update_ref(&self, repo_name: &str, ref_name: &str, ref_hash: &str) -> Result<(), MegaError>; + + async fn get_object(&self, repo_name: &str, object_id: &str) -> Result; + + // async fn parse_blob_link(&self, data: Vec) -> Result { + // let mut reader = BufReader::new(data.as_slice()); + // let mut blink = BlobLink::default(); + // // for line in reader.lines() { + // // let str = line.unwrap(); + // // } + // let mut buf = String::new(); + // reader.read_line(&mut buf).unwrap(); + // blink.version = buf.split_whitespace().next(); + // let result = self.get_by_path(&blink.storge_location).await.unwrap(); + // Ok(blink) + // } + + async fn put_object( + &self, + repo_name: &str, + object_id: &str, + body_content: &[u8], + ) -> Result; + + // save a entry and return the b_link file + async fn put_entry(&self, repo_name: &str, entry: &Entry) -> Result, MegaError> { + let location = self + .put_object( + repo_name, + &entry.hash.unwrap().to_plain_str(), + &entry.data, + ) + .await + .unwrap(); + let handlebars = Handlebars::new(); + + let path = env::current_dir().unwrap().join("b_link.txt"); + let mut file = File::open(path).unwrap(); + let mut template = String::new(); + file.read_to_string(&mut template).unwrap(); + + let mut context = serde_json::Map::new(); + context.insert( + "objectType".to_string(), + serde_json::json!(entry.header.to_string()), + ); + context.insert( + "sha1".to_string(), + serde_json::json!(entry.hash.unwrap().to_plain_str()), + ); + context.insert( + "type".to_string(), + serde_json::json!(self.get_storage_type().to_string()), + ); + context.insert("location".to_string(), serde_json::json!(location)); + + let rendered = handlebars.render_template(&template, &context).unwrap(); + + Ok(rendered.into_bytes()) + } + + fn exist_object(&self, repo_name: &str, object_id: &str) -> bool; + + fn transform_path(&self, sha1: &str) -> String { + if sha1.len() < 5 { + sha1.to_string() + } else { + path::Path::new(&sha1[0..2]) + .join(&sha1[2..4]) + .join(&sha1[4..sha1.len()]) + .into_os_string() + .into_string() + .unwrap() + } + } +} + +pub async fn init() -> Arc { + let storage_type = env::var("MEGA_RAW_STORAGR").unwrap(); + match storage_type.as_str() { + "LOCAL" => { + let base_path = PathBuf::from(env::var("MEGA_OBJ_LOCAL_PATH").unwrap()); + Arc::new(LocalStorage::init(base_path)) + } + // "REMOTE" => Arc::new(RemoteStorage::init(path).await), + _ => unreachable!( + "Not supported config, MEGA_OBJ_STORAGR_TYPE should be 'LOCAL' or 'REMOTE'" + ), + } +} diff --git a/jupiter/src/storage/git_storage.rs b/jupiter/src/storage/git_storage.rs new file mode 100644 index 00000000..da4ae127 --- /dev/null +++ b/jupiter/src/storage/git_storage.rs @@ -0,0 +1,89 @@ +use std::{io::Cursor, sync::Arc}; + +use async_trait::async_trait; + +use common::errors::MegaError; +use venus::{ + hash::SHA1, + internal::{ + object::{types::ObjectType, utils}, + pack::{entry::Entry, header::EntryHeader, reference::RefCommand}, + repo::Repo, + }, +}; + +use crate::{ + raw_storage::{self, RawStorage}, + storage::StorageProvider, +}; + +pub struct GitStorage { + pub rawobj_storage: Arc, +} + +#[async_trait] +impl StorageProvider for GitStorage { + async fn save_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + self.rawobj_storage + .put_ref(&repo.repo_name, &refs.ref_name, &refs.new_id) + .await + } + + async fn remove_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + self.rawobj_storage + .delete_ref(&repo.repo_name, &refs.ref_name) + .await + } + + async fn get_ref(&self, repo: Repo, refs: RefCommand) -> Result { + self.rawobj_storage + .get_ref(&repo.repo_name, &refs.ref_name) + .await + } + + async fn update_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + self.rawobj_storage + .update_ref(&repo.repo_name, &refs.ref_name, &refs.new_id) + .await + } + + async fn save_entry(&self, repo: Repo, result_entity: Vec) -> Result<(), MegaError> { + for entry in result_entity { + self.rawobj_storage + .put_object( + &repo.repo_name, + &entry.hash.unwrap().to_plain_str(), + &entry.data, + ) + .await + .unwrap(); + } + Ok(()) + } + + async fn get_entry_by_sha1(&self, repo: Repo, sha1_vec: Vec<&str>) -> Result, MegaError> { + let mut res: Vec = Vec::new(); + for sha1 in sha1_vec { + let data = self.rawobj_storage.get_object(&repo.repo_name, sha1).await.unwrap(); + let (type_num, _) = utils::read_type_and_size(&mut Cursor::new(&data)).unwrap(); + let o_type = ObjectType::from_u8(type_num).unwrap(); + let header = EntryHeader::from_string(&o_type.to_string()); + let sha1 = SHA1::new(&data.to_vec()); + res.push(Entry { + header, + offset: 0, + data: data.to_vec(), + hash: Some(sha1), + }) + } + Ok(res) + } +} + +impl GitStorage { + pub async fn new() -> Self { + GitStorage { + rawobj_storage: raw_storage::init().await, + } + } +} diff --git a/jupiter/src/storage/mega_storage.rs b/jupiter/src/storage/mega_storage.rs new file mode 100644 index 00000000..5a6953ed --- /dev/null +++ b/jupiter/src/storage/mega_storage.rs @@ -0,0 +1,246 @@ +use std::{env, sync::Arc}; + +use async_trait::async_trait; +use sea_orm::{ + sea_query::OnConflict, ActiveModelTrait, ColumnTrait, ConnectionTrait, DatabaseConnection, + EntityTrait, IntoActiveModel, QueryFilter, Set, +}; + +use common::errors::MegaError; +use db_entity::{db_enums::StorageType, git_commit, git_refs, mega_commit, raw_objects}; +use venus::internal::{ + object::commit::Commit, + pack::{entry::Entry, reference::RefCommand}, + repo::Repo, +}; + +use crate::{ + raw_storage::{self, RawStorage}, + storage::StorageProvider, +}; + +use crate::storage::MegaStorageProvider; + +pub struct MegaStorage { + pub raw_storage: Arc, + pub connection: DatabaseConnection, +} + +#[async_trait] +impl StorageProvider for MegaStorage { + async fn save_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + let mut model: git_refs::Model = refs.clone().into(); + model.ref_git_id = refs.new_id; + model.repo_id = repo.repo_id; + let a_model = model.into_active_model(); + git_refs::Entity::insert(a_model) + .exec(self.get_connection()) + .await + .unwrap(); + Ok(()) + } + + async fn remove_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + git_refs::Entity::delete_many() + .filter(git_refs::Column::RepoId.eq(repo.repo_id)) + .filter(git_refs::Column::RefName.eq(refs.ref_name)) + .exec(self.get_connection()) + .await?; + Ok(()) + } + + async fn get_ref(&self, repo: Repo, refs: RefCommand) -> Result { + let result = git_refs::Entity::find() + .filter(git_refs::Column::RepoId.eq(repo.repo_id)) + .filter(git_refs::Column::RefName.eq(refs.ref_name)) + .one(self.get_connection()) + .await?; + if let Some(model) = result { + return Ok(model.ref_git_id); + } + Ok(String::new()) + } + + async fn update_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError> { + let ref_data: Option = git_refs::Entity::find() + .filter(git_refs::Column::RepoId.eq(repo.repo_id)) + .filter(git_refs::Column::RefName.eq(refs.ref_name)) + .one(self.get_connection()) + .await + .unwrap(); + let mut ref_data: git_refs::ActiveModel = ref_data.unwrap().into(); + ref_data.ref_git_id = Set(refs.new_id); + ref_data.updated_at = Set(chrono::Utc::now().naive_utc()); + ref_data.update(self.get_connection()).await.unwrap(); + Ok(()) + } + + async fn save_entry(&self, repo: Repo, result_entity: Vec) -> Result<(), MegaError> { + let threshold = env::var("MEGA_BIG_OBJ_THRESHOLD_SIZE") + .expect("MEGA_BIG_OBJ_THRESHOLD_SIZE not configured") + .parse::() + .unwrap(); + + let mut save_models: Vec = Vec::new(); + for entry in result_entity.iter() { + let mut model: raw_objects::Model = entry.clone().into(); + let data = model.data.clone().unwrap(); + // save data through raw_storgae insted of database if exceed threshold + if threshold != 0 && data.len() / 1024 > threshold { + let b_link = self.raw_storage.put_entry(&repo.repo_name, entry).await.unwrap(); + model.storage_type = self.raw_storage.get_storage_type(); + model.data = Some(b_link); + } + save_models.push(model.into_active_model()) + } + batch_save_model(self.get_connection(), save_models) + .await + .unwrap(); + Ok(()) + } + + async fn get_entry_by_sha1(&self, repo: Repo, sha1_vec: Vec<&str>) -> Result, MegaError> { + let models = raw_objects::Entity::find() + .filter(raw_objects::Column::Sha1.is_in(sha1_vec)) + .all(self.get_connection()) + .await + .unwrap(); + let mut result: Vec = Vec::new(); + for mut model in models { + if model.storage_type == StorageType::Database { + result.push(model.into()); + } else { + let data = self.raw_storage.get_object(&repo.repo_name, &model.sha1).await.unwrap(); + model.data = Some(data.to_vec()); + result.push(model.into()); + } + } + Ok(result) + } +} + +#[async_trait] +impl MegaStorageProvider for MegaStorage { + async fn save_git_commits( + &self, + repo_id: i64, + full_path: &str, + commits: Vec, + ) -> Result<(), MegaError> { + let git_commits: Vec = + commits.into_iter().map(git_commit::Model::from).collect(); + let mut save_models = Vec::new(); + for mut git_commit in git_commits { + git_commit.full_path = full_path.to_string(); + git_commit.repo_id = repo_id; + save_models.push(git_commit.into_active_model()); + } + batch_save_model(self.get_connection(), save_models) + .await + .unwrap(); + Ok(()) + } + + async fn save_mega_commits( + &self, + mr_id: &str, + full_path: &str, + commits: Vec, + ) -> Result<(), MegaError> { + let mega_commits: Vec = + commits.into_iter().map(mega_commit::Model::from).collect(); + let mut save_models = Vec::new(); + for mut mega_commit in mega_commits { + mega_commit.full_path = full_path.to_string(); + mega_commit.mr_id = Some(mr_id.to_string()); + save_models.push(mega_commit.into_active_model()); + } + batch_save_model(self.get_connection(), save_models) + .await + .unwrap(); + Ok(()) + } +} + +impl MegaStorage { + pub fn get_connection(&self) -> &DatabaseConnection { + &self.connection + } + + pub async fn new(connection: DatabaseConnection) -> Self { + MegaStorage { + connection, + raw_storage: raw_storage::init().await, + } + } +} + +/// Performs batch saving of models in the database. +/// +/// The method takes a vector of models to be saved and performs batch inserts using the given entity type `E`. +/// The models should implement the `ActiveModelTrait` trait, which provides the necessary functionality for saving and inserting the models. +/// +/// The method splits the models into smaller chunks, each containing models configured bu chunk_size, and inserts them into the database using the `E::insert_many` function. +/// The results of each insertion are collected into a vector of futures. +/// +/// Note: Currently, SQLx does not support packets larger than 16MB. +/// +/// +/// # Arguments +/// +/// * `save_models` - A vector of models to be saved. +/// +/// # Generic Constraints +/// +/// * `E` - The entity type that implements the `EntityTrait` trait. +/// * `A` - The model type that implements the `ActiveModelTrait` trait and is convertible from the corresponding model type of `E`. +/// +/// # Errors +/// +/// Returns a `MegaError` if an error occurs during the batch save operation. +pub async fn batch_save_model( + connection: &impl ConnectionTrait, + save_models: Vec, +) -> Result<(), MegaError> +where + E: EntityTrait, + A: ActiveModelTrait + From<::Model> + Send, +{ + let mut results = Vec::new(); + for chunk in save_models.chunks(1000) { + // notice that sqlx not support packets larger than 16MB now + let res = E::insert_many(chunk.iter().cloned()) + .on_conflict(OnConflict::new().do_nothing().to_owned()) + .exec(connection); + results.push(res); + } + futures::future::join_all(results).await; + Ok(()) +} + +#[allow(unused)] +async fn batch_query_by_columns( + connection: &DatabaseConnection, + column: C, + ids: Vec, + filter_column: Option, + value: Option, +) -> Result, MegaError> +where + T: EntityTrait, + C: ColumnTrait, +{ + let mut result = Vec::::new(); + for chunk in ids.chunks(1000) { + let query_builder = T::find().filter(column.is_in(chunk)); + + // Conditionally add the filter based on the value parameter + let query_builder = match value { + Some(ref v) => query_builder.filter(filter_column.unwrap().eq(v)), + None => query_builder, + }; + + result.extend(query_builder.all(connection).await?); + } + Ok(result) +} diff --git a/jupiter/src/storage/mod.rs b/jupiter/src/storage/mod.rs new file mode 100644 index 00000000..a94b673b --- /dev/null +++ b/jupiter/src/storage/mod.rs @@ -0,0 +1,67 @@ +pub mod git_storage; +pub mod mega_storage; + +use async_trait::async_trait; + +use common::errors::MegaError; +use venus::internal::{ + object::{commit::Commit, tree::Tree}, + pack::{entry::Entry, reference::RefCommand}, + repo::Repo, +}; + +#[async_trait] +pub trait StorageProvider: Send + Sync { + async fn save_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError>; + + async fn remove_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError>; + + async fn get_ref(&self, repo: Repo, refs: RefCommand) -> Result; + + async fn update_ref(&self, repo: Repo, refs: RefCommand) -> Result<(), MegaError>; + + async fn save_entry(&self, repo: Repo, result_entity: Vec) -> Result<(), MegaError>; + + async fn get_entry_by_sha1( + &self, + repo: Repo, + sha1_vec: Vec<&str>, + ) -> Result, MegaError>; +} + +#[async_trait] +pub trait DbStorageProvider: StorageProvider { + async fn save_commits(&self, commits: Vec) -> Result<(), MegaError>; + + async fn save_trees(&self, trees: Vec) -> Result<(), MegaError>; +} + +#[async_trait] +pub trait MegaStorageProvider: StorageProvider { + + async fn save_git_repo(&self) { + todo!() + } + + async fn update_git_repo(&self) { + todo!() + } + + async fn save_git_trees(&self) { + todo!() + } + + async fn save_git_commits( + &self, + repo_id: i64, + full_path: &str, + commits: Vec, + ) -> Result<(), MegaError>; + + async fn save_mega_commits( + &self, + mr_id: &str, + full_path: &str, + commits: Vec, + ) -> Result<(), MegaError>; +} diff --git a/mercury/Cargo.toml b/mercury/Cargo.toml index 29386bae..a08d16e2 100644 --- a/mercury/Cargo.toml +++ b/mercury/Cargo.toml @@ -7,7 +7,10 @@ edition = "2021" [dependencies] common = { path = "../common" } -sha1_smol = "1.0.0" +db_entity = { path = "../jupiter/entity" } +venus ={ path = "../venus"} + +# sha1_smol = "1.0.0" serde = { workspace = true, features = ["derive"] } bstr = { workspace = true } @@ -16,4 +19,5 @@ thiserror = { workspace = true } flate2 = { workspace = true } tracing = { workspace = true } sha1 = { workspace = true } -colored = {workspace = true} +colored = { workspace = true } +chrono = { workspace = true } diff --git a/mercury/src/internal/mod.rs b/mercury/src/internal/mod.rs index 510ae3eb..49bd7422 100644 --- a/mercury/src/internal/mod.rs +++ b/mercury/src/internal/mod.rs @@ -4,5 +4,4 @@ //! -pub mod object; pub mod pack; \ No newline at end of file diff --git a/mercury/src/internal/object/utils.rs b/mercury/src/internal/object/utils.rs deleted file mode 100644 index 5969a9ed..00000000 --- a/mercury/src/internal/object/utils.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! -//! -//! -//! - -/// Parses a byte slice into a `usize` representing the size of a Git object. -/// -/// This function is intended to be used for converting the bytes, which represent the size portion -/// in a Git object, back into a `usize`. This size is typically compared with the actual length of -/// the object's data part to ensure data integrity. -/// -/// # Parameters -/// * `bytes`: A byte slice (`&[u8]`) representing the size in a serialized Git object. -/// -/// # Returns -/// Returns a `Result` which is: -/// * `Ok(usize)`: On successful parsing, returns the size as a `usize`. -/// * `Err(Box)`: On failure, returns an error in a Box. This error could be -/// due to invalid UTF-8 encoding in the byte slice or a failure to parse the byte slice as a `usize`. -/// -/// # Errors -/// This function handles two main types of errors: -/// 1. `Utf8Error`: If the byte slice is not a valid UTF-8 string, which is necessary for the size representation. -/// 2. `ParseIntError`: If the byte slice does not represent a valid `usize` value. -pub fn parse_size_from_bytes(bytes: &[u8]) -> Result> { - let size_str = std::str::from_utf8(bytes)?; - Ok(size_str.parse::()?) -} - -#[cfg(test)] -mod tests { - use crate::internal::object::utils::parse_size_from_bytes; - - #[test] - fn test_parse_size_from_bytes() -> Result<(), Box> { - let size: usize = 12345; - let size_bytes = size.to_string().as_bytes().to_vec(); - - let parsed_size = parse_size_from_bytes(&size_bytes)?; - - assert_eq!(size, parsed_size); - Ok(()) - } -} \ No newline at end of file diff --git a/mercury/src/internal/pack/cache.rs b/mercury/src/internal/pack/cache.rs index 0d267ca3..82bba91f 100644 --- a/mercury/src/internal/pack/cache.rs +++ b/mercury/src/internal/pack/cache.rs @@ -8,9 +8,9 @@ use std::collections::HashMap; use std::path::PathBuf; -use crate::hash::SHA1; -use crate::internal::object::types::ObjectType; -use crate::internal::object::ObjectTrait; +use venus::hash::SHA1; +use venus::internal::object::types::ObjectType; +use venus::internal::object::ObjectTrait; #[allow(unused)] #[derive(Debug, Clone)] diff --git a/mercury/src/internal/pack/decode.rs b/mercury/src/internal/pack/decode.rs index b8c5e799..f294a492 100644 --- a/mercury/src/internal/pack/decode.rs +++ b/mercury/src/internal/pack/decode.rs @@ -11,9 +11,10 @@ use std::path::PathBuf; use flate2::bufread::ZlibDecoder; -use crate::errors::GitError; -use crate::hash::SHA1; -use crate::internal::object::types::ObjectType; +use venus::errors::GitError; +use venus::hash::SHA1; +use venus::internal::object::types::ObjectType; + use crate::internal::pack::Pack; use crate::internal::pack::wrapper::Wrapper; use crate::internal::pack::utils::read_type_and_varint_size; @@ -321,7 +322,7 @@ mod tests { use flate2::write::ZlibEncoder; use flate2::Compression; - use crate::hash::SHA1; + use venus::hash::SHA1; use crate::internal::pack::Pack; #[test] diff --git a/mercury/src/internal/pack/mod.rs b/mercury/src/internal/pack/mod.rs index 1edeb9b4..721ede27 100644 --- a/mercury/src/internal/pack/mod.rs +++ b/mercury/src/internal/pack/mod.rs @@ -8,7 +8,8 @@ pub mod wrapper; pub mod utils; pub mod cache; -use crate::hash::SHA1; + +use venus::hash::SHA1; /// /// diff --git a/mercury/src/internal/pack/wrapper.rs b/mercury/src/internal/pack/wrapper.rs index 22237aff..3d44bcb3 100644 --- a/mercury/src/internal/pack/wrapper.rs +++ b/mercury/src/internal/pack/wrapper.rs @@ -7,7 +7,7 @@ use std::io::{self, Read, BufRead}; use sha1::{Sha1, Digest}; -use crate::hash::SHA1; +use venus::hash::SHA1; /// `Wrapper` is a wrapper around a reader that also computes the SHA1 hash of the data read. /// diff --git a/mercury/src/lib.rs b/mercury/src/lib.rs index 7524a5dc..0f95c76a 100644 --- a/mercury/src/lib.rs +++ b/mercury/src/lib.rs @@ -1,15 +1,10 @@ //! Mercury is a library for encode and decode Git Pack format file or stream. -//! //! //! - +//! pub mod cache; pub mod internal; -pub mod hash; -pub mod errors; #[cfg(test)] -mod tests { - -} +mod tests {} diff --git a/sql/postgres/pg_20240205__init.sql b/sql/postgres/pg_20240205__init.sql index 6f0134e2..f11b03a4 100644 --- a/sql/postgres/pg_20240205__init.sql +++ b/sql/postgres/pg_20240205__init.sql @@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS "mega_commit" ( "id" BIGINT PRIMARY KEY, "commit_id" VARCHAR(40) NOT NULL, "tree" VARCHAR(40) NOT NULL, - "parents_id" TEXT [], + "parents_id" TEXT [] NOT NULL, "author" TEXT, "committer" TEXT, "content" TEXT, @@ -72,7 +72,7 @@ CREATE TABLE IF NOT EXISTS "mega_mr" ( "id" BIGINT PRIMARY KEY, "mr_link" VARCHAR(40) NOT NULL, "mr_msg" VARCHAR(255) NOT NULL, - "merge_date" TIMESTAMP NOT NULL, + "merge_date" TIMESTAMP, "status" VARCHAR(20) NOT NULL, "created_at" TIMESTAMP NOT NULL, "updated_at" TIMESTAMP NOT NULL @@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS "git_refs" ( "repo_id" BIGINT NOT NULL, "ref_name" TEXT NOT NULL, "ref_git_id" VARCHAR(40) NOT NULL, + "ref_type" VARCHAR(20) NOT NULL, "created_at" TIMESTAMP NOT NULL, "updated_at" TIMESTAMP NOT NULL, CONSTRAINT uniq_ref_path_name UNIQUE (repo_id, ref_name) @@ -112,7 +113,7 @@ CREATE TABLE IF NOT EXISTS "git_commit" ( "repo_id" BIGINT NOT NULL, "commit_id" VARCHAR(40) NOT NULL, "tree" VARCHAR(40) NOT NULL, - "pid" TEXT [], + "parents_id" TEXT [] NOT NULL, "author" TEXT, "committer" TEXT, "content" TEXT, diff --git a/tests/refs/heads/master b/tests/refs/heads/master new file mode 100644 index 00000000..ce2fedc3 --- /dev/null +++ b/tests/refs/heads/master @@ -0,0 +1 @@ +5bb8ee25bac1014c15abc49c56d1ee0aab1050cb \ No newline at end of file diff --git a/venus/Cargo.toml b/venus/Cargo.toml new file mode 100644 index 00000000..cd6d2f1d --- /dev/null +++ b/venus/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "venus" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +common = { path = "../common" } +db_entity = { path = "../jupiter/entity" } +sha1_smol = "1.0.0" + +serde = { workspace = true, features = ["derive"] } +bstr = { workspace = true } +hex = { workspace = true } +thiserror = { workspace = true } +flate2 = { workspace = true } +tracing = { workspace = true } +sha1 = { workspace = true } +colored = { workspace = true } +chrono = { workspace = true } \ No newline at end of file diff --git a/venus/src/db_convert/commit.rs b/venus/src/db_convert/commit.rs new file mode 100644 index 00000000..db0684d0 --- /dev/null +++ b/venus/src/db_convert/commit.rs @@ -0,0 +1,72 @@ +use std::str::FromStr; + +use common::utils::generate_id; +use db_entity::{db_enums::MergeStatus, git_commit, mega_commit}; + +use crate::{ + hash::SHA1, + internal::object::{commit::Commit, signature::Signature, ObjectTrait}, +}; + +impl From for Commit { + fn from(value: git_commit::Model) -> Self { + Commit { + id: SHA1::from_str(&value.commit_id).unwrap(), + tree_id: SHA1::from_str(&value.tree).unwrap(), + parent_commit_ids: value + .parents_id + .into_iter() + .map(|id| SHA1::from_str(&id).unwrap()) + .collect(), + author: Signature::new_from_data(value.author.unwrap().into()).unwrap(), + committer: Signature::new_from_data(value.committer.unwrap().into()).unwrap(), + message: value.content.unwrap(), + } + } +} + +impl From for git_commit::Model { + fn from(value: Commit) -> Self { + git_commit::Model { + id: generate_id(), + repo_id: 0, + commit_id: value.id.to_plain_str(), + tree: value.tree_id.to_plain_str(), + parents_id: value + .parent_commit_ids + .iter() + .map(|x| x.to_plain_str()) + .collect(), + author: Some(value.author.to_string()), + committer: Some(value.committer.to_string()), + content: Some(value.message.clone()), + size: value.get_size() as i32, + full_path: "".to_string(), + created_at: chrono::Utc::now().naive_utc(), + } + } +} + +impl From for mega_commit::Model { + fn from(value: Commit) -> Self { + mega_commit::Model { + id: generate_id(), + commit_id: value.id.to_plain_str(), + tree: value.tree_id.to_plain_str(), + parents_id: value + .parent_commit_ids + .iter() + .map(|x| x.to_plain_str()) + .collect(), + author: Some(value.author.to_string()), + committer: Some(value.committer.to_string()), + content: Some(value.message.clone()), + size: value.get_size() as i32, + full_path: "".to_string(), + mr_id: None, + status: MergeStatus::Open, + created_at: chrono::Utc::now().naive_utc(), + updated_at: chrono::Utc::now().naive_utc(), + } + } +} diff --git a/venus/src/db_convert/entry.rs b/venus/src/db_convert/entry.rs new file mode 100644 index 00000000..872dd569 --- /dev/null +++ b/venus/src/db_convert/entry.rs @@ -0,0 +1,34 @@ +use std::str::FromStr; + +use common::utils::generate_id; +use db_entity::{db_enums::StorageType, raw_objects}; + +use crate::{ + hash::SHA1, + internal::pack::{entry::Entry, header::EntryHeader}, +}; + +impl From for raw_objects::Model { + fn from(value: Entry) -> Self { + raw_objects::Model { + id: generate_id(), + sha1: value.hash.unwrap().to_plain_str(), + object_type: String::from_utf8_lossy(value.header.to_bytes()).to_string(), + storage_type: StorageType::Database, + data: Some(value.data), + local_storage_path: None, + remote_url: None, + } + } +} + +impl From for Entry { + fn from(value: raw_objects::Model) -> Self { + Entry { + header: EntryHeader::from_string(&value.object_type), + offset: 0, + data: value.data.unwrap(), + hash: Some(SHA1::from_str(&value.sha1).unwrap()), + } + } +} diff --git a/venus/src/db_convert/mod.rs b/venus/src/db_convert/mod.rs new file mode 100644 index 00000000..f239a5f0 --- /dev/null +++ b/venus/src/db_convert/mod.rs @@ -0,0 +1,4 @@ +pub mod commit; +pub mod entry; +pub mod reference; +pub mod repo; diff --git a/venus/src/db_convert/reference.rs b/venus/src/db_convert/reference.rs new file mode 100644 index 00000000..e9947338 --- /dev/null +++ b/venus/src/db_convert/reference.rs @@ -0,0 +1,18 @@ +use common::utils::generate_id; +use db_entity::git_refs; + +use crate::internal::pack::reference::RefCommand; + +impl From for git_refs::Model { + fn from(value: RefCommand) -> Self { + git_refs::Model { + id: generate_id(), + repo_id: 0, + ref_name: value.ref_name, + ref_git_id: String::new(), + ref_type: value.ref_type, + created_at: chrono::Utc::now().naive_utc(), + updated_at: chrono::Utc::now().naive_utc(), + } + } +} diff --git a/venus/src/db_convert/repo.rs b/venus/src/db_convert/repo.rs new file mode 100644 index 00000000..ff4702e8 --- /dev/null +++ b/venus/src/db_convert/repo.rs @@ -0,0 +1,14 @@ +use db_entity::git_repo; + +use crate::internal::repo::Repo; + +impl From for git_repo::Model { + fn from(value: Repo) -> Self { + git_repo::Model { + id: value.repo_id, + repo_path: value.repo_path, + created_at: chrono::Utc::now().naive_utc(), + updated_at: chrono::Utc::now().naive_utc(), + } + } +} diff --git a/mercury/src/errors.rs b/venus/src/errors.rs similarity index 100% rename from mercury/src/errors.rs rename to venus/src/errors.rs diff --git a/mercury/src/hash.rs b/venus/src/hash.rs similarity index 100% rename from mercury/src/hash.rs rename to venus/src/hash.rs diff --git a/venus/src/internal/mod.rs b/venus/src/internal/mod.rs new file mode 100644 index 00000000..569a9fee --- /dev/null +++ b/venus/src/internal/mod.rs @@ -0,0 +1,3 @@ +pub mod object; +pub mod pack; +pub mod repo; diff --git a/mercury/src/internal/object/blob.rs b/venus/src/internal/object/blob.rs similarity index 100% rename from mercury/src/internal/object/blob.rs rename to venus/src/internal/object/blob.rs diff --git a/venus/src/internal/object/commit.rs b/venus/src/internal/object/commit.rs new file mode 100644 index 00000000..d9f0c446 --- /dev/null +++ b/venus/src/internal/object/commit.rs @@ -0,0 +1,151 @@ +//! The Commit object is is a data structure used to represent a specific version of a project's +//! files at a particular point in time. In Git, the commit object is a fundamental data structure +//! that is used to track changes to a repository's files over time. Whenever a developer makes +//! changes to the files in a repository, they create a new commit object that records those changes. +//! +//! Each commit object in Git contains the following information: +//! +//! - A unique SHA-1 hash that identifies the commit. +//! - The author and committer of the commit (which may be different people). +//! - The date and time the commit was made. +//! - A commit message that describes the changes made in the commit. +//! - A reference to the parent commit or commits (in the case of a merge commit) that the new commit is based on. +//! - The contents of the files in the repository at the time the commit was made. +//! +//! +//! +use std::fmt::Display; +use std::str::FromStr; + +use bstr::ByteSlice; + +use crate::errors::GitError; +use crate::hash::SHA1; +use crate::internal::object::signature::Signature; +use crate::internal::object::ObjectTrait; +use crate::internal::object::ObjectType; + +/// The `Commit` struct is used to represent a commit object. +/// +/// - The tree object SHA points to the top level tree for this commit, which reflects the complete +/// state of the repository at the time of the commit. The tree object in turn points to blobs and +/// subtrees which represent the files in the repository. +/// - The parent commit SHAs allow Git to construct a linked list of commits and build the full +/// commit history. By chaining together commits in this fashion, Git is able to represent the entire +/// history of a repository with a single commit object at its root. +/// - The author and committer fields contain the name, email address, timestamp and timezone. +/// - The message field contains the commit message, which maybe include signed or DCO. +#[allow(unused)] +#[derive(Eq, Debug, Clone)] +pub struct Commit { + pub id: SHA1, + pub tree_id: SHA1, + pub parent_commit_ids: Vec, + pub author: Signature, + pub committer: Signature, + pub message: String, +} + +impl PartialEq for Commit { + fn eq(&self, other: &Self) -> bool { + self.tree_id == other.tree_id + } +} + +impl Display for Commit { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "tree: {}", self.tree_id)?; + for parent in self.parent_commit_ids.iter() { + writeln!(f, "parent: {}", parent)?; + } + writeln!(f, "author {}", self.author)?; + writeln!(f, "committer {}", self.committer)?; + writeln!(f, "{}", self.message) + } +} + +impl Commit { + #[allow(unused)] + pub fn to_data(&self) -> Result, GitError> { + let mut data = Vec::new(); + + data.extend(b"tree "); + data.extend(self.tree_id.to_plain_str().as_bytes()); + data.extend(&[0x0a]); + + for parent_tree_id in &self.parent_commit_ids { + data.extend(b"parent "); + data.extend(parent_tree_id.to_plain_str().as_bytes()); + data.extend(&[0x0a]); + } + + data.extend(self.author.to_data()?); + data.extend(&[0x0a]); + data.extend(self.committer.to_data()?); + data.extend(&[0x0a]); + data.extend(self.message.as_bytes()); + + Ok(data) + } +} + +impl ObjectTrait for Commit { + fn get_type(&self) -> ObjectType { + ObjectType::Commit + } + + fn get_size(&self) -> usize { + todo!() + } + + fn from_bytes(data: Vec) -> Result + where + Self: Sized, + { + let mut commit = data; + // Find the tree id and remove it from the data + let tree_end = commit.find_byte(0x0a).unwrap(); + let tree_id: SHA1 = SHA1::from_str( + String::from_utf8(commit[5..tree_end].to_owned()) + .unwrap() + .as_str(), + ).unwrap(); + commit = commit[tree_end + 1..].to_vec(); + + // Find the parent commit ids and remove them from the data + let author_begin = commit.find("author").unwrap(); + let parent_commit_ids: Vec = commit[..author_begin] + .find_iter("parent") + .map(|parent| { + let parent_end = commit[parent..].find_byte(0x0a).unwrap(); + SHA1::from_str( + String::from_utf8(commit[parent + 7..parent + parent_end].to_owned()) + .unwrap() + .as_str(), + ).unwrap() + }) + .collect(); + commit = commit[author_begin..].to_vec(); + + // Find the author and committer and remove them from the data + let author = + Signature::new_from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap(); + commit = commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec(); + let committer = + Signature::new_from_data(commit[..commit.find_byte(0x0a).unwrap()].to_vec()).unwrap(); + + // The rest is the message + let message = unsafe { + String::from_utf8_unchecked(commit[commit.find_byte(0x0a).unwrap() + 1..].to_vec()) + }; + + Ok(Commit { + id: SHA1([0u8; 20]), + tree_id, + parent_commit_ids, + author, + committer, + message, + }) + } +} diff --git a/mercury/src/internal/object/mod.rs b/venus/src/internal/object/mod.rs similarity index 62% rename from mercury/src/internal/object/mod.rs rename to venus/src/internal/object/mod.rs index 84095369..5802cf2b 100644 --- a/mercury/src/internal/object/mod.rs +++ b/venus/src/internal/object/mod.rs @@ -1,24 +1,24 @@ -//! -//! -//! -//! -//! +pub mod blob; +pub mod commit; +pub mod signature; +pub mod tree; pub mod types; pub mod utils; -pub mod blob; use std::fmt::Display; use crate::errors::GitError; use crate::internal::object::types::ObjectType; -pub trait ObjectTrait: Send + Sync + Display{ +pub trait ObjectTrait: Send + Sync + Display { /// Creates a new object from a byte slice. - fn from_bytes(data: Vec) -> Result where Self: Sized; + fn from_bytes(data: Vec) -> Result + where + Self: Sized; /// Returns the type of the object. fn get_type(&self) -> ObjectType; /// fn get_size(&self) -> usize; -} \ No newline at end of file +} diff --git a/venus/src/internal/object/signature.rs b/venus/src/internal/object/signature.rs new file mode 100644 index 00000000..6dcbc816 --- /dev/null +++ b/venus/src/internal/object/signature.rs @@ -0,0 +1,260 @@ +//! In a Git commit, the author signature contains the name, email address, timestamp, and timezone +//! of the person who authored the commit. This information is stored in a specific format, which +//! consists of the following fields: +//! +//! - Name: The name of the author, encoded as a UTF-8 string. +//! - Email: The email address of the author, encoded as a UTF-8 string. +//! - Timestamp: The timestamp of when the commit was authored, encoded as a decimal number of seconds +//! since the Unix epoch (January 1, 1970, 00:00:00 UTC). +//! - Timezone: The timezone offset of the author's local time from Coordinated Universal Time (UTC), +//! encoded as a string in the format "+HHMM" or "-HHMM". +//! +use std::{fmt::Display, str::FromStr}; + +use bstr::ByteSlice; + +use crate::errors::GitError; + +/// In addition to the author signature, Git also includes a "committer" signature, which indicates +/// who committed the changes to the repository. The committer signature is similar in structure to +/// the author signature, but includes the name, email address, and timestamp of the committer instead. +/// This can be useful in situations where multiple people are working on a project and changes are +/// being reviewed and merged by someone other than the original author. +/// +/// In the following example, it's has the only one who authored and committed. +/// ```bash +/// author Eli Ma 1678102132 +0800 +/// committer Quanyi Ma 1678102132 +0800 +/// ``` +/// +/// So, we design a `SignatureType` enum to indicate the signature type. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub enum SignatureType { + Author, + Committer, + Tagger, +} + +impl Display for SignatureType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + SignatureType::Author => write!(f, "author"), + SignatureType::Committer => write!(f, "committer"), + SignatureType::Tagger => write!(f, "tagger"), + } + } +} +impl FromStr for SignatureType { + type Err = GitError; + /// The `from_str` method is used to convert a string to a `SignatureType` enum. + fn from_str(s: &str) -> Result { + match s { + "author" => Ok(SignatureType::Author), + "committer" => Ok(SignatureType::Committer), + "tagger" => Ok(SignatureType::Tagger), + _ => Err(GitError::InvalidSignatureType(s.to_string())), + } + } +} +impl SignatureType { + /// The `from_data` method is used to convert a `Vec` to a `SignatureType` enum. + #[allow(unused)] + pub fn from_data(data: Vec) -> Result { + let s = String::from_utf8(data.to_vec())?; + SignatureType::from_str(s.as_str()) + } + + /// The `to_bytes` method is used to convert a `SignatureType` enum to a `Vec`. + #[allow(unused)] + pub fn to_bytes(&self) -> Vec { + match self { + SignatureType::Author => "author".to_string().into_bytes(), + SignatureType::Committer => "committer".to_string().into_bytes(), + SignatureType::Tagger => "tagger".to_string().into_bytes(), + } + } +} + +#[allow(unused)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Signature { + pub signature_type: SignatureType, + pub name: String, + pub email: String, + pub timestamp: usize, + pub timezone: String, +} + +impl Display for Signature { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "{} <{}>", self.name, self.email).unwrap(); + writeln!(f, "Date: {}", self.timestamp) + } +} + +impl Signature { + #[allow(unused)] + pub fn new_from_data(data: Vec) -> Result { + // Make a mutable copy of the input data vector. + let mut sign = data; + + // Find the index of the first space byte in the data vector. + let name_start = sign.find_byte(0x20).unwrap(); + + // Parse the author name from the bytes up to the first space byte. + // If the parsing fails, unwrap will panic. + let signature_type = SignatureType::from_data(sign[..name_start].to_vec()).unwrap(); + + let (name, email) = { + let email_start = sign.find_byte(0x3C).unwrap(); + let email_end = sign.find_byte(0x3E).unwrap(); + + unsafe { + ( + sign[name_start + 1..email_start - 1] + .to_str_unchecked() + .to_string(), + sign[email_start + 1..email_end] + .to_str_unchecked() + .to_string(), + ) + } + }; + + // Update the data vector to remove the author and email bytes. + sign = sign[sign.find_byte(0x3E).unwrap() + 2..].to_vec(); + + // Find the index of the second space byte in the updated data vector. + let timestamp_split = sign.find_byte(0x20).unwrap(); + + // Parse the timestamp integer from the bytes up to the second space byte. + // If the parsing fails, unwrap will panic. + let timestamp = unsafe { sign[0..timestamp_split] + .to_str_unchecked() + .parse::() + .unwrap() }; + + // Parse the timezone string from the bytes after the second space byte. + // If the parsing fails, unwrap will panic. + let timezone = unsafe { sign[timestamp_split + 1..].to_str_unchecked().to_string() }; + + // Return a Result object indicating success + Ok(Signature { + signature_type, + name, + email, + timestamp, + timezone, + }) + } + + /// + #[allow(unused)] + pub fn to_data(&self) -> Result, GitError> { + // Create a new empty vector to store the encoded data. + let mut sign = Vec::new(); + + // Append the author name bytes to the data vector, followed by a space byte. + sign.extend_from_slice(&self.signature_type.to_bytes()); + sign.extend_from_slice(&[0x20]); + + // Append the name bytes to the data vector, followed by a space byte. + sign.extend_from_slice(self.name.as_bytes()); + sign.extend_from_slice(&[0x20]); + + // Append the email address bytes to the data vector, enclosed in angle brackets. + sign.extend_from_slice(format!("<{}>", self.email).as_bytes()); + sign.extend_from_slice(&[0x20]); + + // Append the timestamp integer bytes to the data vector, followed by a space byte. + sign.extend_from_slice(self.timestamp.to_string().as_bytes()); + sign.extend_from_slice(&[0x20]); + + // Append the timezone string bytes to the data vector. + sign.extend_from_slice(self.timezone.as_bytes()); + + // Return the data vector as a Result object indicating success. + Ok(sign) + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use crate::internal::object::signature::{Signature, SignatureType}; + + #[test] + fn test_signature_type_from_str() { + assert_eq!( + SignatureType::from_str("author").unwrap(), + SignatureType::Author + ); + + assert_eq!( + SignatureType::from_str("committer").unwrap(), + SignatureType::Committer + ); + } + + #[test] + fn test_signature_type_from_data() { + assert_eq!( + SignatureType::from_data("author".to_string().into_bytes()).unwrap(), + SignatureType::Author + ); + + assert_eq!( + SignatureType::from_data("committer".to_string().into_bytes()).unwrap(), + SignatureType::Committer + ); + } + + #[test] + fn test_signature_type_to_bytes() { + assert_eq!( + SignatureType::Author.to_bytes(), + "author".to_string().into_bytes() + ); + + assert_eq!( + SignatureType::Committer.to_bytes(), + "committer".to_string().into_bytes() + ); + } + + #[test] + fn test_signature_new_from_data() { + let sign = Signature::new_from_data( + "author Quanyi Ma 1678101573 +0800" + .to_string() + .into_bytes(), + ) + .unwrap(); + + assert_eq!(sign.signature_type, SignatureType::Author); + assert_eq!(sign.name, "Quanyi Ma"); + assert_eq!(sign.email, "eli@patch.sh"); + assert_eq!(sign.timestamp, 1678101573); + assert_eq!(sign.timezone, "+0800"); + } + + #[test] + fn test_signature_to_data() { + let sign = Signature::new_from_data( + "committer Quanyi Ma 1678101573 +0800" + .to_string() + .into_bytes(), + ) + .unwrap(); + + let dest = sign.to_data().unwrap(); + + assert_eq!( + dest, + "committer Quanyi Ma 1678101573 +0800" + .to_string() + .into_bytes() + ); + } +} diff --git a/venus/src/internal/object/tree.rs b/venus/src/internal/object/tree.rs new file mode 100644 index 00000000..ffd53013 --- /dev/null +++ b/venus/src/internal/object/tree.rs @@ -0,0 +1,368 @@ +//! In Git, a tree object is used to represent the state of a directory at a specific point in time. +//! It stores information about the files and directories within that directory, including their names, +//! permissions, and the IDs of the objects that represent their contents. +//! +//! A tree object can contain other tree objects as well as blob objects, which represent the contents +//! of individual files. The object IDs of these child objects are stored within the tree object itself. +//! +//! When you make a commit in Git, you create a new tree object that represents the state of the +//! repository at that point in time. The parent of the new commit is typically the tree object +//! representing the previous state of the repository. +//! +//! Git uses the tree object to efficiently store and manage the contents of a repository. By +//! representing the contents of a directory as a tree object, Git can quickly determine which files +//! have been added, modified, or deleted between two points in time. This allows Git to perform +//! operations like merging and rebasing more quickly and accurately. +//! +use std::fmt::Display; + +use bstr::ByteSlice; +use colored::Colorize; + +use crate::errors::GitError; +use crate::hash::SHA1; +use crate::internal::object::ObjectTrait; +use crate::internal::object::ObjectType; + +/// In Git, the mode field in a tree object's entry specifies the type of the object represented by +/// that entry. The mode is a three-digit octal number that encodes both the permissions and the +/// type of the object. The first digit specifies the object type, and the remaining two digits +/// specify the file mode or permissions. +#[allow(unused)] +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Debug, Clone, Copy)] +pub enum TreeItemMode { + Blob, + BlobExecutable, + Tree, + Commit, + Link, +} + +impl Display for TreeItemMode { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let _print = match *self { + TreeItemMode::Blob => "blob", + TreeItemMode::BlobExecutable => "blob executable", + TreeItemMode::Tree => "tree", + TreeItemMode::Commit => "commit", + TreeItemMode::Link => "link", + }; + + write!(f, "{}", String::from(_print).blue()) + } +} + +impl TreeItemMode { + /// Convert a 32-bit mode to a TreeItemType + /// + /// |0100000000000000| (040000)| Directory| + /// |1000000110100100| (100644)| Regular non-executable file| + /// |1000000110110100| (100664)| Regular non-executable group-writeable file| + /// |1000000111101101| (100755)| Regular executable file| + /// |1010000000000000| (120000)| Symbolic link| + /// |1110000000000000| (160000)| Gitlink| + /// --- + /// # GitLink + /// Gitlink, also known as a submodule, is a feature in Git that allows you to include a Git + /// repository as a subdirectory within another Git repository. This is useful when you want to + /// incorporate code from another project into your own project, without having to manually copy + /// the code into your repository. + /// + /// When you add a submodule to your Git repository, Git stores a reference to the other + /// repository at a specific commit. This means that your repository will always point to a + /// specific version of the other repository, even if changes are made to the submodule's code + /// in the future. + /// + /// To work with a submodule in Git, you use commands like git submodule add, git submodule + /// update, and git submodule init. These commands allow you to add a submodule to your repository, + /// update it to the latest version, and initialize it for use. + /// + /// Submodules can be a powerful tool for managing dependencies between different projects and + /// components. However, they can also add complexity to your workflow, so it's important to + /// understand how they work and when to use them. + #[allow(unused)] + pub fn tree_item_type_from_bytes(mode: &[u8]) -> Result { + Ok(match mode { + b"40000" => TreeItemMode::Tree, + b"100644" => TreeItemMode::Blob, + b"100755" => TreeItemMode::BlobExecutable, + b"120000" => TreeItemMode::Link, + b"160000" => TreeItemMode::Commit, + b"100664" => TreeItemMode::Blob, + b"100640" => TreeItemMode::Blob, + _ => { + return Err(GitError::InvalidTreeItem( + String::from_utf8(mode.to_vec()).unwrap(), + )); + } + }) + } + + /// 32-bit mode, split into (high to low bits): + /// - 4-bit object type: valid values in binary are 1000 (regular file), 1010 (symbolic link) and 1110 (gitlink) + /// - 3-bit unused + /// - 9-bit unix permission: Only 0755 and 0644 are valid for regular files. Symbolic links and gitlink have value 0 in this field. + #[allow(unused)] + pub fn to_bytes(self) -> &'static [u8] { + match self { + TreeItemMode::Blob => b"100644", + TreeItemMode::BlobExecutable => b"100755", + TreeItemMode::Link => b"120000", + TreeItemMode::Tree => b"40000", + TreeItemMode::Commit => b"160000", + } + } +} + +/// A tree object contains a list of entries, one for each file or directory in the tree. Each entry +/// in the file represents an entry in the tree, and each entry has the following format: +/// +/// ```bash +/// \0 +/// ``` +/// - `` is the mode of the object, represented as a six-digit octal number. The first digit +/// represents the object type (tree, blob, etc.), and the remaining digits represent the file mode or permissions. +/// - `` is the name of the object. +/// - `\0` is a null byte separator. +/// - `` is the ID of the object that represents the contents of the file or +/// directory, represented as a binary SHA-1 hash. +/// +/// # Example +/// ```bash +/// 100644 hello-world\0 +/// 040000 data\0 +/// ``` +#[allow(unused)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct TreeItem { + pub mode: TreeItemMode, + pub id: SHA1, + pub name: String, +} + +impl Display for TreeItem { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{} {} {}", + self.mode, + self.name, + self.id.to_string().blue() + ) + } +} + +impl TreeItem { + /// Create a new TreeItem from a mode, id and name + /// + /// # Example + /// ```rust + /// use git::internal::object::tree::{TreeItem, TreeItemMode}; + /// use git::hash::Hash; + /// + /// // Create a empty TreeItem with the default Hash + /// let default_item = TreeItem::new(TreeItemMode::Blob, Hash::default(), String::new()); + /// + /// // Create a blob TreeItem with a custom Hash, and file name + /// let file_item = TreeItem::new(TreeItemMode::Blob, Hash::new_from_str("1234567890abcdef1234567890abcdef12345678"), String::from("hello.txt")); + /// + /// // Create a tree TreeItem with a custom Hash, and directory name + /// let dir_item = TreeItem::new(TreeItemMode::Tree, Hash::new_from_str("1234567890abcdef1234567890abcdef12345678"), String::from("data")); + /// ``` + #[allow(unused)] + pub fn new(mode: TreeItemMode, id: SHA1, name: String) -> Self { + TreeItem { mode, id, name } + } + + /// Create a new TreeItem from a byte vector, split into a mode, id and name, the TreeItem format is: + /// + /// ```bash + /// \0 + /// ``` + /// + #[allow(unused)] + pub fn new_from_bytes(bytes: &[u8]) -> Result { + let mut parts = bytes.splitn(2, |b| *b == b' '); + let mode = parts.next().unwrap(); + let rest = parts.next().unwrap(); + let mut parts = rest.splitn(2, |b| *b == b'\0'); + let name = parts.next().unwrap(); + let id = parts.next().unwrap(); + + Ok(TreeItem { + mode: TreeItemMode::tree_item_type_from_bytes(mode)?, + id: SHA1::from_bytes(id), + name: String::from_utf8(name.to_vec())?, + }) + } + + /// Convert a TreeItem to a byte vector + /// ```rust + /// use git::internal::object::tree::{TreeItem, TreeItemMode}; + /// use git::hash::Hash; + /// + /// let tree_item = TreeItem::new( + /// TreeItemMode::Blob, + /// Hash::new_from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d"), + /// "hello-world".to_string(), + /// ); + /// + // let bytes = tree_item.to_bytes(); + /// ``` + #[allow(unused)] + pub fn to_data(&self) -> Vec { + let mut bytes = Vec::new(); + + bytes.extend_from_slice(self.mode.to_bytes()); + bytes.push(b' '); + bytes.extend_from_slice(self.name.as_bytes()); + bytes.push(b'\0'); + bytes.extend_from_slice(&self.id.to_data()); + + bytes + } +} + +/// A tree object is a Git object that represents a directory. It contains a list of entries, one +/// for each file or directory in the tree. +#[allow(unused)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Tree { + pub id: SHA1, + pub tree_items: Vec, +} + +impl Display for Tree { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + writeln!(f, "Tree: {}", self.id.to_string().blue())?; + for item in &self.tree_items { + writeln!(f, "{}", item)?; + } + + Ok(()) + } +} + +impl Tree { + #[allow(unused)] + pub fn new_from_tree_items(tree_items: Vec) -> Result { + if tree_items.is_empty() { + return Err(GitError::EmptyTreeItems( + "When export tree object to meta, the items is empty" + .parse() + .unwrap(), + )); + } + + let mut data = Vec::new(); + + for item in &tree_items { + data.extend_from_slice(item.to_data().as_slice()); + } + //TODO : Fixme : deal with the hash value + Ok(Tree { + id: SHA1::new(&data), + tree_items, + }) + } + + #[allow(unused)] + pub fn to_data(&self) -> Result, GitError> { + let mut data: Vec = Vec::new(); + + for item in &self.tree_items { + data.extend_from_slice(item.to_data().as_slice()); + //data.push(b'\0'); + } + + Ok(data) + } +} + +impl ObjectTrait for Tree { + fn get_type(&self) -> ObjectType { + ObjectType::Tree + } + + fn from_bytes(data: Vec) -> Result + where + Self: Sized, + { + let mut tree_items = Vec::new(); + let mut i = 0; + while i < data.len() { + let index = data[i..].find_byte(0x00).unwrap(); + let next = i + index + 21; + + tree_items.push(TreeItem::new_from_bytes(&data[i..next]).unwrap()); + i = next + } + + Ok(Tree { + id: SHA1([0u8; 20]), + tree_items, + }) + } + + fn get_size(&self) -> usize { + todo!() + } +} + +#[cfg(test)] +mod tests { + + use std::str::FromStr; + + use crate::hash::SHA1; + use crate::internal::object::tree::{TreeItem, TreeItemMode}; + + #[test] + fn test_tree_item_new() { + let tree_item = TreeItem::new( + TreeItemMode::Blob, + SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(), + "hello-world".to_string(), + ); + + assert_eq!(tree_item.mode, TreeItemMode::Blob); + assert_eq!( + tree_item.id.to_plain_str(), + "8ab686eafeb1f44702738c8b0f24f2567c36da6d" + ); + } + + #[test] + fn test_tree_item_to_bytes() { + let tree_item = TreeItem::new( + TreeItemMode::Blob, + SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(), + "hello-world".to_string(), + ); + + let bytes = tree_item.to_data(); + assert_eq!( + bytes, + vec![ + 49, 48, 48, 54, 52, 52, 32, 104, 101, 108, 108, 111, 45, 119, 111, 114, 108, 100, + 0, 138, 182, 134, 234, 254, 177, 244, 71, 2, 115, 140, 139, 15, 36, 242, 86, 124, + 54, 218, 109 + ] + ); + } + + #[test] + fn test_tree_item_from_bytes() { + let item = TreeItem::new( + TreeItemMode::Blob, + SHA1::from_str("8ab686eafeb1f44702738c8b0f24f2567c36da6d").unwrap(), + "hello-world".to_string(), + ); + + let bytes = item.to_data(); + let tree_item = TreeItem::new_from_bytes(bytes.as_slice()).unwrap(); + + assert_eq!(tree_item.mode, TreeItemMode::Blob); + assert_eq!(tree_item.id.to_plain_str(), item.id.to_plain_str()); + } +} diff --git a/mercury/src/internal/object/types.rs b/venus/src/internal/object/types.rs similarity index 100% rename from mercury/src/internal/object/types.rs rename to venus/src/internal/object/types.rs diff --git a/venus/src/internal/object/utils.rs b/venus/src/internal/object/utils.rs new file mode 100644 index 00000000..39a233b8 --- /dev/null +++ b/venus/src/internal/object/utils.rs @@ -0,0 +1,112 @@ +//! +//! +//! +//! + +use std::io::{self, Read}; + + +const TYPE_BITS: u8 = 3; +const VAR_INT_ENCODING_BITS: u8 = 7; +const TYPE_BYTE_SIZE_BITS: u8 = VAR_INT_ENCODING_BITS - TYPE_BITS; +const VAR_INT_CONTINUE_FLAG: u8 = 1 << VAR_INT_ENCODING_BITS; + + +/// Parses a byte slice into a `usize` representing the size of a Git object. +/// +/// This function is intended to be used for converting the bytes, which represent the size portion +/// in a Git object, back into a `usize`. This size is typically compared with the actual length of +/// the object's data part to ensure data integrity. +/// +/// # Parameters +/// * `bytes`: A byte slice (`&[u8]`) representing the size in a serialized Git object. +/// +/// # Returns +/// Returns a `Result` which is: +/// * `Ok(usize)`: On successful parsing, returns the size as a `usize`. +/// * `Err(Box)`: On failure, returns an error in a Box. This error could be +/// due to invalid UTF-8 encoding in the byte slice or a failure to parse the byte slice as a `usize`. +/// +/// # Errors +/// This function handles two main types of errors: +/// 1. `Utf8Error`: If the byte slice is not a valid UTF-8 string, which is necessary for the size representation. +/// 2. `ParseIntError`: If the byte slice does not represent a valid `usize` value. +pub fn parse_size_from_bytes(bytes: &[u8]) -> Result> { + let size_str = std::str::from_utf8(bytes)?; + Ok(size_str.parse::()?) +} + + + +/// Preserve the last bits of value binary +/// +fn keep_bits(value: usize, bits: u8) -> usize { + value & ((1 << bits) - 1) +} +/// Read the first few fields of the object and parse +/// +pub fn read_type_and_size(stream: &mut R) -> io::Result<(u8, usize)> { + // Object type and uncompressed pack data size + // are stored in a "size-encoding" variable-length integer. + // Bits 4 through 6 store the type and the remaining bits store the size. + let value = read_size_encoding(stream)?; + let object_type = keep_bits(value >> TYPE_BYTE_SIZE_BITS, TYPE_BITS) as u8; + let size = keep_bits(value, TYPE_BYTE_SIZE_BITS) + | (value >> VAR_INT_ENCODING_BITS << TYPE_BYTE_SIZE_BITS); + + Ok((object_type, size)) +} + +/// Read the type and size of the object +/// +pub fn read_size_encoding(stream: &mut R) -> io::Result { + let mut value = 0; + let mut length = 0; + + loop { + let (byte_value, more_bytes) = read_var_int_byte(stream).unwrap(); + value |= (byte_value as usize) << length; + if !more_bytes { + return Ok(value); + } + + length += VAR_INT_ENCODING_BITS; + } +} + +/// Returns whether the first bit of u8 is 1 and returns the 7-bit truth value +/// +pub fn read_var_int_byte(stream: &mut R) -> io::Result<(u8, bool)> { + let [byte] = read_bytes(stream)?; + let value = byte & !VAR_INT_CONTINUE_FLAG; + let more_bytes = byte & VAR_INT_CONTINUE_FLAG != 0; + + Ok((value, more_bytes)) +} + +/// Read the next N bytes from the reader +/// +#[inline] +pub fn read_bytes(stream: &mut R) -> io::Result<[u8; N]> { + let mut bytes = [0; N]; + stream.read_exact(&mut bytes)?; + + Ok(bytes) +} + + +#[cfg(test)] +mod tests { + use crate::internal::object::utils::parse_size_from_bytes; + + #[test] + fn test_parse_size_from_bytes() -> Result<(), Box> { + let size: usize = 12345; + let size_bytes = size.to_string().as_bytes().to_vec(); + + let parsed_size = parse_size_from_bytes(&size_bytes)?; + + assert_eq!(size, parsed_size); + Ok(()) + } +} \ No newline at end of file diff --git a/venus/src/internal/pack/entry.rs b/venus/src/internal/pack/entry.rs new file mode 100644 index 00000000..944e3be6 --- /dev/null +++ b/venus/src/internal/pack/entry.rs @@ -0,0 +1,16 @@ +use serde::{Deserialize, Serialize}; + +use crate::hash::SHA1; +use crate::internal::pack::header::EntryHeader; + +/// +/// One Pre loading Git object in memory +/// +#[derive(Clone, Serialize, Deserialize, Default)] +pub struct Entry { + pub header: EntryHeader, + pub offset: usize, + pub data: Vec, + pub hash: Option, +} + diff --git a/venus/src/internal/pack/header.rs b/venus/src/internal/pack/header.rs new file mode 100644 index 00000000..0aa60b78 --- /dev/null +++ b/venus/src/internal/pack/header.rs @@ -0,0 +1,80 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::hash::SHA1; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub enum EntryHeader { + #[default] + Commit, + Tree, + Blob, + Tag, + #[allow(unused)] + RefDelta { + base_id: SHA1, + }, + #[allow(unused)] + OfsDelta { + base_distance: usize, + }, +} + +const COMMIT_OBJECT_TYPE: &[u8] = b"commit"; +const TREE_OBJECT_TYPE: &[u8] = b"tree"; +const BLOB_OBJECT_TYPE: &[u8] = b"blob"; +const TAG_OBJECT_TYPE: &[u8] = b"tag"; + +impl EntryHeader { + pub fn from_string(t: &str) -> Self { + match t { + "commit" => EntryHeader::Commit, + "tree" => EntryHeader::Tree, + "tag" => EntryHeader::Tag, + "blob" => EntryHeader::Blob, + _ => panic!("cat to not base obj"), + } + } + pub fn is_base(&self) -> bool { + match self { + EntryHeader::Commit => true, + EntryHeader::Tree => true, + EntryHeader::Blob => true, + EntryHeader::Tag => true, + EntryHeader::RefDelta { base_id: _ } => false, + EntryHeader::OfsDelta { base_distance: _ } => false, + } + } + pub fn to_bytes(&self) -> &[u8] { + match self { + EntryHeader::Commit => COMMIT_OBJECT_TYPE, + EntryHeader::Tree => TREE_OBJECT_TYPE, + EntryHeader::Blob => BLOB_OBJECT_TYPE, + EntryHeader::Tag => TAG_OBJECT_TYPE, + _ => panic!("can put compute the delta hash value"), + } + } + pub fn to_number(&self) -> u8 { + match self { + EntryHeader::Commit => 1, + EntryHeader::Tree => 2, + EntryHeader::Blob => 3, + EntryHeader::Tag => 4, + EntryHeader::RefDelta { base_id: _ } => 7, + EntryHeader::OfsDelta { base_distance: _ } => 6, + } + } +} +impl Display for EntryHeader { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + EntryHeader::Commit => write!(f, "COMMIT"), + EntryHeader::Tree => write!(f, "Tree"), + EntryHeader::Blob => write!(f, "Blob"), + EntryHeader::Tag => write!(f, "Tag"), + EntryHeader::RefDelta { base_id } => write!(f, "Ref Delta :{}", base_id), + EntryHeader::OfsDelta { base_distance } => write!(f, "Ofs Delta :{}", base_distance), + } + } +} diff --git a/venus/src/internal/pack/mod.rs b/venus/src/internal/pack/mod.rs new file mode 100644 index 00000000..47d7c931 --- /dev/null +++ b/venus/src/internal/pack/mod.rs @@ -0,0 +1,3 @@ +pub mod entry; +pub mod header; +pub mod reference; diff --git a/venus/src/internal/pack/reference.rs b/venus/src/internal/pack/reference.rs new file mode 100644 index 00000000..83128d22 --- /dev/null +++ b/venus/src/internal/pack/reference.rs @@ -0,0 +1,31 @@ +use serde::{Deserialize, Serialize}; + +use db_entity::db_enums::RefType; + +/// +/// Represent the references(all branches and tags) in protocol transfer +/// +#[derive(Clone, Serialize, Deserialize, Default)] +pub struct Reference { + pub ref_name: String, + pub ref_hash: String, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum CommandType { + Create, + Delete, + Update, +} + +/// Reference Update Request +#[derive(Debug, Clone, PartialEq)] +pub struct RefCommand { + pub ref_name: String, + pub old_id: String, + pub new_id: String, + pub status: String, + pub error_msg: String, + pub command_type: CommandType, + pub ref_type: RefType, +} diff --git a/venus/src/internal/repo.rs b/venus/src/internal/repo.rs new file mode 100644 index 00000000..a3a88b32 --- /dev/null +++ b/venus/src/internal/repo.rs @@ -0,0 +1,7 @@ +/// The `repo` struct maintains the relationship between `repo_id` and `repo_path`. +#[derive(PartialEq, Eq, Debug, Clone)] +pub struct Repo { + pub repo_id: i64, + pub repo_path: String, + pub repo_name: String, +} diff --git a/venus/src/lib.rs b/venus/src/lib.rs new file mode 100644 index 00000000..62f79d9a --- /dev/null +++ b/venus/src/lib.rs @@ -0,0 +1,4 @@ +pub mod db_convert; +pub mod errors; +pub mod hash; +pub mod internal;