terraphim · AlexMikhalev · Jun 1, 2024 · May 30, 2024 · May 30, 2024 · Jun 1, 2024
diff --git a/desktop/Earthfile b/desktop/Earthfile
@@ -2,17 +2,18 @@ VERSION --cache-persist-option --global-cache 0.7
 PROJECT applied-knowledge-systems/terraphim-project
 FROM ghcr.io/terraphim/terraphim_builder_native:latest
 
-WORKDIR frontend
+WORKDIR /code/desktop
 
 deps:
     # COPY package.json tsconfig.json yarn.lock vite.config.ts tsconfig.node.json index.html ./
     COPY --keep-ts . .
     # COPY --dir src src
     # COPY --dir public public
     RUN oro apply -q || true
-    RUN pkgx +yarnpkg.com yarn
+    RUN /code/desktop/scripts/yarn_and_build.sh
 
 build:
     FROM +deps
-    RUN pkgx +yarnpkg.com yarn run build
+    # RUN pkgx +yarnpkg.com yarn run build
+
     SAVE ARTIFACT dist /dist AS LOCAL dist
diff --git a/desktop/public/logo_bw_square.png b/desktop/public/logo_bw_square.png
diff --git a/desktop/src/App.svelte b/desktop/src/App.svelte
@@ -62,7 +62,7 @@
     <Route path="/fetch/*"><FetchTabs /></Route>
   </main>
 
-  <footer on:mouseover={toggleVissible}>
+  <footer on:mouseover={toggleVissible} on:focus={toggleVissible}>
     <div class={visible}>
       <Route path="/">
         <nav class="navbar">

diff --git a/docs/book.toml b/docs/book.toml
@@ -7,6 +7,7 @@ title = "Terraphim AI documentation"
 
 [output.html]
 additional-js = ["mermaid.min.js", "mermaid-init.js"]
+mathjax-support = true
 # [output.compress]
 # subtitle = "How to use and contribute to Terraphim AI assistant"
 # highlight = "no-node"
@@ -18,6 +19,37 @@ additional-js = ["mermaid.min.js", "mermaid-init.js"]
 format = "pdf"
 
 [preprocessor]
-
+[preprocessor.alerts]
 [preprocessor.mermaid]
 command = "mdbook-mermaid"
+
+# [output.pandoc]
+# hosted-html = "https://docs.terraphim.ai/" # URL of a HTML version of the book
+
+# [output.pandoc.code]
+# # Display hidden lines in code blocks (e.g., lines in Rust blocks prefixed by '#').
+# # See https://rust-lang.github.io/mdBook/format/mdbook.html?highlight=hidden#hiding-code-lines
+# show-hidden-lines = false
+
+# [output.pandoc.profile.pdf] 
+# # options to pass to Pandoc (see https://pandoc.org/MANUAL.html#defaults-files)
+# output-file = "output.pdf" # output file (within the profile's build directory)
+# to = "latex" # output format
+
+# # PDF-specific settings
+# pdf-engine = "pdflatex" # engine to use to produce PDF output
+
+# # `mdbook-pandoc` overrides Pandoc's defaults for the following options to better support mdBooks
+# file-scope = true # parse each file individually before combining
+# number-sections = true # number sections headings
+# standalone = true # produce output with an appropriate header and footer
+# table-of-contents = true # include an automatically generated table of contents
+
+# [output.pandoc.profile.docx] 
+# # options to pass to Pandoc (see https://pandoc.org/MANUAL.html#defaults-files)
+# output-file = "output.docx" # output file (within the profile's build directory)
+# # `mdbook-pandoc` overrides Pandoc's defaults for the following options to better support mdBooks
+# file-scope = true # parse each file individually before combining
+# number-sections = true # number sections headings
+# standalone = true # produce output with an appropriate header and footer
+# table-of-contents = true # include an automatically generated table of contents
diff --git a/docs/src/Docs.md b/docs/src/Docs.md
@@ -1,4 +1,17 @@
 # Documentation generation
 This repository uses mdbook-typest to generate nice looking pdf
 
-
+    > [!NOTE]  
+> Highlights information that users should take into account, even when skimming.
+
+> [!TIP]
+> Optional information to help a user be more successful.
+
+> [!IMPORTANT]  
+> Crucial information necessary for users to succeed.
+
+> [!WARNING]  
+> Critical content demanding immediate user attention due to potential risks.
+
+> [!CAUTION]
+> Negative potential consequences of an action.
diff --git a/docs/src/Introduction.md b/docs/src/Introduction.md
@@ -3,20 +3,41 @@
 Terraphim is a privacy-first AI assistant which works for you under your complete control. It starts as a local search engine, which can be configured to search for different types of content, such as Stackoverflow, Github, and local filesystem with a pre-defined folder, including Markdown Files. We use modern algorithms for AI/ML, data fusion, and distributed communication techniques to run AI assistants on the user's hardware, including not used mobile devices.
 
 ## Methodological aspects
-To personalise the search relevance, Terraphim uses knowledge graph, either local, in Obsidian or Logseq, or shared, in Atomic Server. We follow the methodology to create and update such a knowledge graph. First, each Terraphim role has its own separate knowledge graph that contains relevant concepts, with all synonyms. Usually, at the start a couple of key industry standards, reference process models, or handbooks are identified, and then such concepts and synonyms are extracted from them, using named entities recognition and expert opinions. Terraphim also can import curated industry taxonomies and produce a knowledge graph from it. The typical structure of a knowledge graph follows the SIPOC pattern – concepts at the input and output of some process are identified, and the process and its activity names are listed. After that, for each additional piece of data, for example, blog post, article, book, or video recording transcript, Terraphim identifies either role concepts from the knowledge graph or synonyms, extending the applicability of the knowledge graph that represents the domain, that is relevant for a selected Terraphim role. Using such textbook-quality datasets for semantic search enables delivery very accurate and precise search results. This methodology has been validated within the INCOSE community for the systems engineering handbook v.4 and the systems engineering digital process model v.1 and was recognised as a valid low-effort substitution of the formal model-based systems engineering, especially for brownfield systems engineering, reverse-engineering, and professional certified systems engineering certification, as well as for engineering education. Connecting the Terraphim role to the widely accepted industry standard helps to activate the accumulated wisdom of generations, augments communication by referring to common and recognised concepts and terms used in the standard, and also helps to formulate and reuse lessons learned, pass audits, following organisational guidelines, and match communication to the documentation and standard terms, used in this documentation and user interfaces of the professional software tools, that often exploit the standard terminology.
+
+
+To personalise the search relevance, Terraphim uses knowledge graph, either local, in Obsidian or Logseq, or shared, in Atomic Server. We follow the methodology to create and update such a knowledge graph. First, each Terraphim role has its own separate knowledge graph that contains relevant concepts, with all synonyms. Usually, at the start a couple of key industry standards, reference process models, or handbooks are identified, and then such concepts and synonyms are extracted from them, using named entities recognition and expert opinions. 
+
+Terraphim also can import curated industry taxonomies and produce a knowledge graph from it. The typical structure of a knowledge graph follows the SIPOC pattern – concepts at the input and output of some process are identified, and the process and its activity names are listed. After that, for each additional piece of data, for example, blog post, article, book, or video recording transcript, Terraphim identifies either role concepts from the knowledge graph or synonyms, extending the applicability of the knowledge graph that represents the domain, that is relevant for a selected Terraphim role. Using such textbook-quality datasets for semantic search enables delivery very accurate and precise search results.
+
+This methodology has been validated within the INCOSE community for the systems engineering handbook v.4 and the systems engineering digital process model v.1 and was recognised as a valid low-effort substitution of the formal model-based systems engineering, especially for brownfield systems engineering, reverse-engineering, and professional certified systems engineering certification, as well as for engineering education. Connecting the Terraphim role to the widely accepted industry standard helps to activate the accumulated wisdom of generations, augments communication by referring to common and recognised concepts and terms used in the standard, and also helps to formulate and reuse lessons learned, pass audits, following organisational guidelines, and match communication to the documentation and standard terms, used in this documentation and user interfaces of the professional software tools, that often exploit the standard terminology.
+
 
 ## Scientific innovation and history
 
-At the core of Terraphim - Unique knowledge graph embeddings: Terraphim AI is designed to be determinicstic precise and have 100% precision and recall: in an engineering context, the more complex the system is being built, the more important to be able to specify not only "the battery" but be able to differentiate between battery cr2365 and AA battery. Hence, Terraphim embeddings maintain the sequence of words in a sentence. Terraphim Graph Embedding maintain the position of the term in a sentence and doesn't need traditional training techniques like "attention". At the same time, by allowing users to specify required synonyms manually and then re-build graph embeddings for the role within 20 milliseconds, Terraphim AI allows to match terms in different languages to the same concept, thus no need to run language detection in the pipeline. By giving users full and precise control over term matching within the selected context, there is no need for a stop-words dictionary: "The Pattern" is the specific name of the project, which was Terraphim AI's predecessor. "The" is normally considered a stop-word and will be filtered pre-tokenisation, and "The pattern" will not match the project's name using traditional techniques. The Pattern used more traditional techniques for graph embeddings, BERT QA and T5 summarization. To highlight the leanings and the major groundbreaking innovation for Terraphim AI: "The Patern" project was Platinum winner https://redis.com/blog/build-on-redis-hackathon-winners/ for Redis Hackathon challenge and at the time will outperform Nvidia ML pipeline for BERT QA inference on CPU and most likely in training. The prize demonstrated that external experts accepted the approach as innovative, and results were presented and discussed on the public lecture at Oxford University at Green Templeton College. 
+
+At the core of Terraphim - Unique knowledge graph embeddings: Terraphim AI is designed to be determinicstic precise and have 100% precision and recall: in an engineering context, the more complex the system is being built, the more important to be able to specify not only "the battery" but be able to differentiate between battery cr2365 and AA battery. Hence, Terraphim embeddings maintain the sequence of words in a sentence. Terraphim Graph Embedding maintain the position of the term in a sentence and doesn't need traditional training techniques like "attention".
+
+At the same time, by allowing users to specify required synonyms manually and then re-build graph embeddings for the role within 20 milliseconds, Terraphim AI allows to match terms in different languages to the same concept, thus no need to run language detection in the pipeline. By giving users full and precise control over term matching within the selected context, there is no need for a stop-words dictionary: "The Pattern" is the specific name of the project, which was Terraphim AI's predecessor. "The" is normally considered a stop-word and will be filtered pre-tokenisation, and "The pattern" will not match the project's name using traditional techniques. The Pattern used more traditional techniques for graph embeddings, BERT QA and T5 summarization.
+
+To highlight the leanings and the major groundbreaking innovation for Terraphim AI: "The Patern" project was Platinum winner https://redis.com/blog/build-on-redis-hackathon-winners/ for Redis Hackathon challenge and at the time will outperform Nvidia ML pipeline for BERT QA inference on CPU and most likely in training. The prize demonstrated that external experts accepted the approach as innovative, and results were presented and discussed on the public lecture at Oxford University at Green Templeton College.
+
 The Pattern grew out of participation in two Kaggle data science competitions, where the original ML pipeline will not finish processing data in 6 days, The Pattern will process data for training in 6 hours and have under 2 millisecond inference. By rethinking Terraphim AI from the ground up we can achieve pipeline processing time in hundreds of milliseconds and query - knowledge graph-based inference in 5 to 10 nanoseconds. Our approach to Terraphim AI allows us and our users way faster and more practical AI applications, and we will engage the scientific community via an existing arrangement with Alan Turing UK NLP group. All projects are open-sourced and learnings are shared on https://terraphim.ai, https://reference-architecture.ai or https://thepattern.digital
 
+
 ## Deployment innovation
 
+
 Terraphim private cloud is designed with minimal shared of infrastructure between tenants, being close to hardware for high performance, effective use of resources and encryption in transit:
 Each user has it's own dedicated virtual machine, based on AWS Firecracker Virtual Machine (microVM). Firecracker Virtual Machine is an open source lightweight VM by AWS, which is the same technology behind AWS Lambda and AWS Firegate services. Use of Firecracker VM instead of docker or Kubernetes allows us:
+
 1) Create a new environment for users under 1 second: ~600-800 ms, including copying a new root partition for the user
+
 2) Provide high performance and effective use of resources with response time in milliseconds and memory usage measured in megabytes - even for Machine Learning/AI
+
 3) Maintain security and encryption in transit: each user has their own valid TLS certificate from Cloudflare to certify HTTPS entry point and encrypt traffic until Firecracker VM: traffic flow is via HTTPS server/load balancer (caddy) which direct traffic to corresponding tun/tap interface (kernel level, no docker/containerd) and VM attached directly to tun/tap interface. In our previous experiments with ML models training for The Pattern we noticed substantial performance degradation in network flow when using Kubernetes or Docker networks compared to standard linux kernel based networks.. Terraphim Private cloud designed specifically with high throughput requirements and as close to the network as possible.
+
 4) Firecracker VM allows us to embed additional services - like Redis for caching for the users into their VMs, with high performance (no need to spin additional containers, unlike with Docker), but user's data won't leave VM boundaries.
+
 5) Terraphim AI private cloud has additional capabilities designed but not enabled by default - for example, the use of Zerotier VPN network to optimise routing between nodes and provide additional security and encryption layer
+
 In summary, Terraphim Private cloud is an evaluation environment for Terraphim AI users, which allows them to test Terraphim AI capabilities while maintaining privacy and ownership of their data. It's a substantial technological innovation in favour of customers and against current tech
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -6,3 +6,4 @@
 - [Design Decisions](./DesignDecisions.md)
 - [Use cases](./Use-cases.md)
 - [Code of Conduct](./CODE_OF_CONDUCT.md)
+- [Mdbook Test](./Docs.md)
diff --git a/docs/src/Use-cases.md b/docs/src/Use-cases.md
@@ -3,14 +3,25 @@
 ## Search your local markdown files, build your own personal knowledge graph, then publish your knowledge to the web, then reuse
 
 ### End to End flow
+
 The current focus is on perfecting overall end-to-end human usable flow:
-    - engineer or expert takes notes in Logseq knowledge graph
-    - notes are used to create thesaurus for Terraphim (using Terraphim desktop, feature done, alpha testing)
-    - Thesaurus is used per each role to create Terraphim graph (used for ranking search results). Feature done, alpha testing
-    - Terraphim desktop is used to search over local markdown files using Terraphim Graph for ranking (Feature done, alpha testing)
-    - Terraphim desktop search automatically populates Atomic server with search results (first attempt and approach didn't work, required changes in Atomic Rust client, atomic server-side updated with new client fetch, Terraphim feature for automatic cache population WIP https://github.com/terraphim/terraphim-ai/issues/12)
-    - User works with search results forming blogs or articles in Atomic server using Atomic as standard CMS (all features done)
-    - Final articles are published via different domains using Atomic Server + Sveltekit
-        - current websites: https://systems.tf
-        - planned set of websites on different topics: learning rust, knowledge graphs, search engines, Metacortex Engineering (cross-discipline collaboration)
-    - Articles published (or drafts) in Atomic Server are used as input for Terraphim (both desktop and cloud) as haystack for the role (instead or alongside markdown files or any other sources) Blocked on https://github.com/atomicdata-dev/atomic-server/issues/778
+- engineer or expert takes notes in the Logseq-knowledge graph
+
+- notes are used to create thesaurus for Terraphim (using Terraphim desktop, feature done, alpha testing)
+
+- Thesaurus is used per each role to create Terraphim graph (used for ranking search results). Feature done, alpha testing
+
+- Terraphim desktop is used to search over local markdown files using Terraphim Graph for ranking (Feature done, alpha testing)
+
+- Terraphim desktop search automatically populates Atomic server with search results (first attempt and approach didn't work, required changes in Atomic Rust client, atomic 
+server-side updated with new client fetch, Terraphim feature for automatic cache population WIP https://github.com/terraphim/terraphim-ai/issues/12)
+
+- User works with search results forming blogs or articles in Atomic server using Atomic as standard CMS (all features done)
+
+- Final articles are published via different domains using Atomic Server + Sveltekit
+
+    - current websites: [Systems Engineering](https://systems.tf)
+
+    - planned set of websites on different topics: learning rust, knowledge graphs, search engines, Metacortex Engineering (cross-discipline collaboration)
+
+- Articles published (or drafts) in Atomic Server are used as input for Terraphim (both desktop and cloud) as haystack for the role (instead or alongside markdown files or any other sources) Blocked on https://github.com/atomicdata-dev/atomic-server/issues/778
diff --git a/scripts/yarn_and_build.sh b/scripts/yarn_and_build.sh
@@ -0,0 +1,7 @@
+!/usr/bin/env bash
+# this is to install and build front end inside bionic
+curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
+bash -c "source $HOME/.nvm/nvm.sh && nvm install 16.15.1"
+bash -c "source $HOME/.nvm/nvm.sh && npm install -g yarn"
+bash -c "source $HOME/.nvm/nvm.sh && cd /code/desktop && yarn && yarn build"
+cp -Rv /code/desktop/dist /code/terraphim_server/
diff --git a/terraphim_server/build.rs b/terraphim_server/build.rs
@@ -101,7 +101,7 @@ fn should_build(dirs: &Dirs) -> bool {
 
 /// Runs JS package manager to install packages and build the JS bundle
 fn build_js(dirs: &Dirs) {
-    let pkg_manager = "yarn";
+    let pkg_manager = "./scripts/yarn_and_build.sh";
 
     p!("install js packages...");
 

diff --git a/terraphim_server/dist/180x180.png b/terraphim_server/dist/180x180.png
diff --git a/terraphim_server/dist/32x32.png b/terraphim_server/dist/32x32.png