Newer
Older
Sakayaki / Services / SyncService.cs
@fabre fabre on 20 Jan 5 KB 注释
using System.Globalization;
using Microsoft.EntityFrameworkCore;
using Sakayaki.Models;

namespace Sakayaki.Services;

public sealed class SyncService(AppDbContext dbContext)
{
    private readonly AppDbContext _dbContext = dbContext;

    /// <summary>
    /// 扫描指定作者的 Fanbox 目录并同步新文件夹到数据库。
    /// </summary>
    public async Task<int> SyncFanboxFoldersAsync(
        string root,
        string author,
        CancellationToken cancellationToken = default)
    {
        // 入口参数校验,确保根路径与作者信息有效。
        if (string.IsNullOrWhiteSpace(root))
            throw new ArgumentException("Root path is required.", nameof(root));
        if (string.IsNullOrWhiteSpace(author))
            throw new ArgumentException("Author is required.", nameof(author));

        var inserted = 0;

        var pending = new List<FanboxFolder>();
        // 预加载已有关键词,后续用于关键词复用与命中。
        var existingKeywords = await LoadExistingKeywordsAsync(cancellationToken);

        foreach (var dir in Directory.GetDirectories(root))
        {
            cancellationToken.ThrowIfCancellationRequested();

            var folderName = Path.GetFileName(dir);
            // 约定:以 yyyy-MM-dd- 开头的目录才参与同步。
            if (folderName.Length < 11 || folderName[10] != '-')
                continue;

            var datePart = folderName.Substring(0, 10);
            // 解析目录名前 10 位日期,失败则跳过。
            if (!DateTime.TryParseExact(
                    datePart,
                    "yyyy-MM-dd",
                    CultureInfo.InvariantCulture,
                    DateTimeStyles.None,
                    out var date))
                continue;

            var title = folderName.Substring(11);
            // 组合标题与已有关键词,生成本次关键词列表。
            var keywordsStr = BuildKeywords(title, existingKeywords);
            // 统计目录内文件数量,用于展示/校验。
            var fileCount = Directory.GetFiles(dir).Length;

            // 数据库中已存在相同作者 + 日期 + 标题时跳过。
            var exists = await _dbContext.FanboxFolders.AsNoTracking().AnyAsync(
                x => x.Author == author && x.Date == date && x.Title == title,
                cancellationToken);
            if (exists)
                continue;

            // 待插入列表先暂存,最后一次性写入。
            pending.Add(new FanboxFolder
            {
                FolderName = folderName,
                Author = author,
                Date = date,
                Title = title,
                Keywords = keywordsStr,
                FileCount = fileCount
            });
        }

        if (pending.Count == 0)
            return 0;

        // 批量写入并返回实际插入数量。
        _dbContext.FanboxFolders.AddRange(pending);
        inserted = await _dbContext.SaveChangesAsync(cancellationToken);

        return inserted;
    }

    /// <summary>
    /// 从标题中抽取关键词,并结合已有关键词集合进行补全。
    /// </summary>
    private static string? BuildKeywords(string title, IReadOnlyCollection<string> existingKeywords)
    {
        // 使用 HashSet 去重,保持关键词唯一性。
        var hit = new HashSet<string>(StringComparer.Ordinal);

        // 统一替换常见分隔符,便于分词。
        var cleaned = title
            .Replace("【", " ")
            .Replace("】", " ")
            .Replace("(", " ")
            .Replace(")", " ")
            .Replace("(", " ")
            .Replace(")", " ")
            .Replace("/", " ")
            .Replace("/", " ")
            .Replace(",", " ")
            .Replace("_", " ")
            .Replace("-", " ");

        // 以空格切分并过滤短词,避免噪声。
        foreach (var w in cleaned.Split(' ', StringSplitOptions.RemoveEmptyEntries))
        {
            if (w.Length >= 2)
                hit.Add(w);
        }

        // 如果标题包含已有关键词,则补充命中。
        foreach (var k in existingKeywords)
        {
            if (title.Contains(k, StringComparison.Ordinal))
                hit.Add(k);
        }

        return hit.Count > 0 ? string.Join(",", hit) : null;
    }

    /// <summary>
    /// 从数据库中加载并去重全部历史关键词。
    /// </summary>
    private async Task<IReadOnlyCollection<string>> LoadExistingKeywordsAsync(CancellationToken cancellationToken)
    {
        var keywordLists = await _dbContext.FanboxFolders.AsNoTracking()
            .Select(x => x.Keywords)
            .Where(x => x != null && x != string.Empty)
            .Distinct()
            .ToListAsync(cancellationToken);

        // 逐条拆分关键词字符串,聚合到唯一集合。
        var keywords = new HashSet<string>(StringComparer.Ordinal);
        foreach (var list in keywordLists)
        {
            if (string.IsNullOrWhiteSpace(list))
                continue;

            foreach (var keyword in list.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
            {
                keywords.Add(keyword);
            }
        }

        return keywords;
    }
}