Skip to content

Commit

Permalink
Merge pull request #3 from lindera/workflows
Browse files Browse the repository at this point in the history
Fix workflows
  • Loading branch information
mosuka authored Oct 27, 2024
2 parents b61bb5a + 54eba16 commit bd35c3e
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 72 deletions.
31 changes: 0 additions & 31 deletions .github/workflows/ci.yml

This file was deleted.

4 changes: 3 additions & 1 deletion .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ jobs:
- runner: windows-latest
target: x86_64-pc-windows-msvc
toolchain: [stable, beta, nightly]
features: ["ipadic", "ko-dic", "cc-cedict"]
features: ["ipadic,ko-dic,cc-cedict"]
runs-on: ${{ matrix.platform.runner }}
env:
LINDERA_CONFIG_PATH: "./resources/lindera.json"
steps:
- name: Run checkout
uses: actions/checkout@v4
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ jobs:
- runner: windows-latest
target: x86_64-pc-windows-msvc
toolchain: [stable]
features: ["ipadic", "ko-dic", "cc-cedict"]
features: ["ipadic,ko-dic,cc-cedict"]
runs-on: ${{ matrix.platform.runner }}
env:
LINDERA_CONFIG_PATH: "./resources/lindera.json"
steps:
- name: Run checkout
uses: actions/checkout@v4
Expand Down
113 changes: 113 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
name: Release

on:
workflow_dispatch:

push:
tags:
- "v*.*.*"

permissions:
contents: read

jobs:
build:
name: Build
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
archive: .zip
extension: ".a"
- runner: macOS-latest
target: x86_64-apple-darwin
archive: .zip
extension: ".a"
- runner: macOS-latest
target: aarch64-apple-darwin
archive: .zip
extension: ".a"
- runner: windows-latest
target: x86_64-pc-windows-msvc
archive: .zip
extension: ".ilb"
toolchain: [stable]
features: ["ipadic", "ko-dic", "cc-cedict"]
runs-on: ${{ matrix.platform.runner }}
env:
LINDERA_CONFIG_PATH: "./resources/lindera.json"
steps:
- name: Run checkout
uses: actions/checkout@v4

- name: Install toolchain
uses: dtolnay/rust-toolchain@v1
with:
toolchain: ${{ matrix.toolchain }}
target: ${{ matrix.platform.target }}
components: rustfmt, clippy

- name: Add msbuild to PATH
if: matrix.platform.target == 'windows-latest'
uses: microsoft/setup-msbuild@v2

- name: Compile
run: cargo build --release --features=${{ matrix.features }},extension --target=${{ matrix.platform.target }} --target-dir=target/${{ matrix.platform.target }}

- name: Create artifact for Linux
if: matrix.platform.runner == 'ubuntu-latest'
run: zip --junk-paths lindera-${{ matrix.features }}-${{ matrix.platform.target }}-${{ github.ref_name }}${{ matrix.platform.archive }} target/${{ matrix.features }}/${{ matrix.platform.target }}/release/lindera${{ matrix.platform.extension }}

- name: Create artifact for Windows
if: matrix.platform.runner == 'windows-latest'
run: powershell Compress-Archive -DestinationPath lindera-${{ matrix.platform.target }}-${{ matrix.platform.target }}-${{ github.ref_name }}${{ matrix.platform.archive }} -Path target/${{ matrix.features }}/${{ matrix.platform.target }}/release/lindera${{ matrix.platform.extension }}

- name: Create artifact for OSX
if: matrix.platform.runner == 'macos-latest'
run: zip --junk-paths lindera-${{ matrix.features }}-${{ matrix.platform.target }}-${{ github.ref_name }}${{ matrix.platform.archive }} target/${{ matrix.features }}/${{ matrix.platform.target }}/release/lindera${{ matrix.platform.extension }}

- name: Upload artifact
uses: softprops/action-gh-release@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
files: lindera-${{ matrix.features }}-${{ matrix.platform.target }}-${{ github.ref_name }}${{ matrix.platform.archive }}
name: Release ${{ github.ref_name }}
tag_name: ${{ github.ref_name }}
draft: false
prerelease: false
generate_release_notes: true

publish-crates:
name: Publish crate
needs: [build]
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
toolchain: [stable]
runs-on: ${{ matrix.platform.runner }}
env:
LINDERA_CONFIG_PATH: "./resources/lindera.json"
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install toolchain
uses: dtolnay/rust-toolchain@v1
with:
toolchain: ${{ matrix.toolchain }}
target: ${{ matrix.platform.target }}
components: rustfmt, clippy

- name: Publish lindera-sqlite
run: |
LINDERA_SQLITE_VERSION=$(cargo metadata --no-deps --format-version=1 | jq -r '.packages[] | select(.name=="lindera-sqlite") | .version')
LINDERA_SQLITE_VERSIONS=$(curl -s -XGET https://crates.io/api/v1/crates/lindera-sqlite | jq -r 'select(.versions != null) | .versions[].num')
if echo ${LINDERA_SQLITE_VERSIONS} | grep ${LINDERA_SQLITE_VERSION} >/dev/null; then
echo "lindera-sqlite ${LINDERA_SQLITE_VERSION} has already published"
else
cargo publish --token ${{ secrets.CRATES_TOKEN }}
fi
2 changes: 1 addition & 1 deletion src/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ pub extern "C" fn fts5_create_lindera_tokenizer(
unsafe {
*fts5_tokenizer = Box::into_raw(tokenizer);
}

SQLITE_OK
}

Expand Down
41 changes: 3 additions & 38 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use std::io::BufReader;

use dotenv::dotenv;
use libc::{c_char, c_int, c_uchar, c_void};
// use unicode_normalization::UnicodeNormalization;

use lindera::tokenizer::{Tokenizer, TokenizerConfig};

Expand Down Expand Up @@ -69,17 +68,9 @@ fn lindera_fts5_tokenize_internal(
// wouldn't accessible.
let input = core::str::from_utf8(slice).map_err(|_| SQLITE_OK)?;

// let mut normalized = String::with_capacity(1024);

match unsafe { (*tokenizer).tokenizer.tokenize(input) } {
Ok(tokens) => {
for token in tokens {
// normalize_into(token.text.as_ref(), &mut normalized);
// let a = normalized.as_bytes().as_ptr();
// let b = token.text.as_bytes().as_ptr();
// let c = normalized.len();
// let d = token.text.len();

let rc = x_token(
p_ctx,
0,
Expand All @@ -101,36 +92,10 @@ fn lindera_fts5_tokenize_internal(
Ok(())
}

// fn is_diacritic(x: char) -> bool {
// '\u{0300}' <= x && x <= '\u{036f}'
// }

// fn normalize_into(segment: &str, buf: &mut String) {
// buf.clear();

// for x in segment.nfd() {
// if is_diacritic(x) {
// continue;
// }
// if x.is_ascii() {
// buf.push(x.to_ascii_lowercase());
// } else {
// buf.extend(x.to_lowercase());
// }
// }
// }

#[cfg(test)]
mod tests {
use super::*;

// #[test]
// fn it_normalizes_segment() {
// let mut buf = String::new();
// normalize_into("DïācRîtįcs", &mut buf);
// assert_eq!(buf, "diacritics");
// }

extern "C" fn token_callback(
ctx: *mut c_void,
flags: c_int,
Expand Down Expand Up @@ -172,11 +137,11 @@ mod tests {
assert_eq!(
tokens,
[
("lindera", 0, 21),
("Lindera", 0, 21),
("形態素", 24, 33),
("解析", 33, 39),
("エンシ\u{3099}", 39, 54),
("ユーサ\u{3099}", 63, 75),
("エンジン", 39, 54),
("ユーザ", 63, 75),
("辞書", 75, 81),
("利用", 84, 90),
("可能", 90, 96)
Expand Down

0 comments on commit bd35c3e

Please sign in to comment.