initial commit

This commit is contained in:
2026-04-04 23:53:26 +03:00
commit a82d4d3819
7 changed files with 3096 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
/target
Generated
+2464
View File
File diff suppressed because it is too large Load Diff
+30
View File
@@ -0,0 +1,30 @@
[package]
name = "lyrics_fetcher"
version = "0.1.0"
edition = "2024"
[dependencies]
tokio = { version = "1.51.0", features = ["full"] }
futures = "0.3.32"
async-walkdir = "2.1.0"
async-trait = "0.1.89"
clap = { version = "4.6.0", features = ["derive"] }
anyhow = "1.0.102"
itertools = "0.14.0"
id3 = { version = "1.16.4", default-features = false, features = ["tokio"] }
reqwest = { version = "0.13.2", default-features = false, features = [
"rustls",
"json",
"query"
] }
serde = { version = "1.0.228", features = ["derive"] }
scraper = "0.26.0"
[profile.release]
strip = true
lto = true
codegen-units = 1
opt-level = "z"
panic = "abort"
+221
View File
@@ -0,0 +1,221 @@
mod sources;
use sources::*;
use anyhow::Result;
use async_walkdir::WalkDir;
use itertools::Itertools;
use futures::StreamExt;
use std::{
path::{Path, PathBuf},
process::exit,
sync::Arc,
};
use tokio::sync::Semaphore;
use clap::Parser;
use id3::{Tag, TagLike, Version, frame::Lyrics};
#[derive(Parser)]
struct Args {
#[arg(required = true)]
music_dir_path: PathBuf,
#[arg(short, long, default_value_t = 1)]
threads: usize,
#[arg(short, long, default_value_t = false)]
overwrite_existing: bool,
#[arg(short, long, default_value_t = false)]
allow_inaccurate: bool,
#[arg(short, long, default_value = "lrclib", value_delimiter = ',')]
sources: Vec<String>,
#[arg(
short,
long,
default_value = "flac,mp3,ogg,opus,aac,wav,m4a,alac,aiff,ape",
value_delimiter = ','
)]
extensions: Vec<String>,
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
let music_dir_path = args.music_dir_path;
let threads = args.threads;
let overwrite = args.overwrite_existing;
let allow_inaccurate = args.allow_inaccurate;
let lyrics_sources_raw = args.sources;
let extensions: Vec<String> = args.extensions.iter().map(|s| s.to_lowercase()).collect();
if !music_dir_path.exists() {
eprintln!("[ERROR] The specified music directory does not exist.");
exit(1);
}
if threads < 1 {
eprintln!("[ERROR] The number of threads must be at least 1.");
exit(1);
}
if lyrics_sources_raw.is_empty() {
eprintln!("[ERROR] At least one source must be specified.");
exit(1);
}
if extensions.is_empty() {
eprintln!("[ERROR] At least one file extension must be specified.");
exit(1);
}
let lyrics_sources_raw: Vec<String> = lyrics_sources_raw
.iter()
.unique()
.map(|s| s.to_lowercase())
.collect();
println!("[INFO] Music directory: {}", music_dir_path.display());
println!("[INFO] Number of threads: {}", threads);
println!("[INFO] Sources: {}", lyrics_sources_raw.join(", "));
let mut sources: Vec<Arc<dyn LyricsSource>> = vec![];
for source_name in lyrics_sources_raw {
let source_name = source_name.trim().to_lowercase();
match create_source(&source_name).await {
Ok(source) => sources.push(source),
Err(e) => {
eprintln!(
"[WARNING] Failed to initialize source '{}', skipping: {}",
source_name, e
);
}
}
}
if sources.is_empty() {
eprintln!("[ERROR] No valid sources were initialized. Exiting.");
exit(1);
}
let sources = Arc::new(sources);
let semaphone = Arc::new(Semaphore::new(threads));
let mut entries = WalkDir::new(music_dir_path);
let mut tasks = Vec::new();
while let Some(entry) = entries.next().await {
match entry {
Ok(entry) => {
if entry.file_type().await?.is_file()
&& extensions.contains(
&entry
.path()
.extension()
.unwrap_or_default()
.to_string_lossy()
.to_lowercase(),
)
{
let file_path = entry.path().to_path_buf();
let semaphone_clone = Arc::clone(&semaphone);
let sources_clone = Arc::clone(&sources);
let task = tokio::spawn(async move {
let _permit = semaphone_clone.acquire().await.unwrap();
process_file(&file_path, sources_clone, overwrite, allow_inaccurate).await;
});
tasks.push(task);
}
}
Err(e) => eprintln!("[ERROR] Failed to read directory entry: {}", e),
}
}
for task in tasks {
let _ = task.await;
}
Ok(())
}
async fn process_file(
file_path: &Path,
sources: Arc<Vec<Arc<dyn LyricsSource>>>,
overwrite: bool,
allow_inaccurate: bool,
) {
let tag = Tag::async_read_from_path(&file_path).await;
match tag {
Ok(tag) => {
let lyrics: Vec<&Lyrics> = tag.lyrics().collect();
if !lyrics.is_empty() && !overwrite {
println!(
"[INFO] File '{}' already has lyrics, skipping (use --overwrite to force)",
file_path.display()
);
return;
}
for source in sources.iter() {
match source.fetch_lyrics(&tag, allow_inaccurate).await {
Ok(lyrics) => {
let lyrics = lyrics.trim();
if lyrics.is_empty() {
println!(
"[INFO] Source '{}' did not return any lyrics for file '{}'",
source.name(),
file_path.display()
);
continue;
}
println!(
"[INFO] Successfully fetched lyrics for file '{}' from source '{}'",
file_path.display(),
source.name()
);
let mut tag = tag.clone();
tag.remove_all_lyrics();
tag.remove_all_synchronised_lyrics();
tag.add_frame(Lyrics {
lang: "XXX".to_string(),
description: format!("Fetched from {}", source.name()),
text: lyrics.to_string(),
});
tag.write_to_path(&file_path, Version::Id3v24)
.map_err(|e| {
eprintln!(
"[ERROR] Failed to write tags into {}: {}",
file_path.display(),
e
);
})
.ok();
break;
}
Err(e) => eprintln!(
"[ERROR] Failed to fetch lyrics for file '{}' from source '{}': {}",
file_path.display(),
source.name(),
e
),
}
}
}
Err(e) => eprintln!(
"[ERROR] Failed to read ID3 tag for file '{}': {}",
file_path.display(),
e
),
}
}
+193
View File
@@ -0,0 +1,193 @@
use crate::sources::LyricsSource;
use anyhow::{Context, Result, anyhow};
use async_trait::async_trait;
use id3::{Tag, TagLike};
use reqwest::Client;
use scraper::{ElementRef, Html, Node, Selector};
use serde::Deserialize;
pub struct GeniusSource {
pub api_key: String,
client: Client,
}
#[derive(Deserialize)]
struct GeniusSearchResponse {
response: GeniusSearchData,
}
#[derive(Deserialize)]
struct GeniusSearchData {
hits: Vec<GeniusHit>,
}
#[derive(Deserialize)]
struct GeniusHit {
#[serde(rename = "type")]
hit_type: String,
result: GeniusHitResult,
}
#[derive(Deserialize)]
struct GeniusHitResult {
title: String,
primary_artist: GeniusArtist,
url: String,
}
#[derive(Deserialize)]
struct GeniusArtist {
name: String,
}
impl GeniusSource {
pub async fn new() -> Result<Self> {
let api_key = std::env::var("GENIUS_API_KEY")
.context("GENIUS_API_KEY environment variable is required for Genius source")?;
let client = Client::builder()
.user_agent(format!(
"{name}/{version} (+{repo_url})",
name = env!("CARGO_PKG_NAME"),
version = env!("CARGO_PKG_VERSION"),
repo_url = env!("CARGO_PKG_REPOSITORY")
))
.build()
.context("Failed to build HTTP client for Genius")?;
Ok(Self { api_key, client })
}
}
#[async_trait]
impl LyricsSource for GeniusSource {
fn name(&self) -> &'static str {
"Genius"
}
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String> {
let title = tag
.title()
.ok_or_else(|| anyhow!("Missing track title in ID3 tag"))?;
let artist = tag
.artist()
.ok_or_else(|| anyhow!("Missing artist name in ID3 tag"))?;
let query = format!("{} {}", artist, title);
let search_res = self
.client
.get("https://api.genius.com/search")
.query(&[("q", &query)])
.bearer_auth(&self.api_key)
.send()
.await?;
if !search_res.status().is_success() {
return Err(anyhow!(
"Genius API search failed with status {}: {}",
search_res.status(),
search_res.text().await.unwrap_or_default()
));
}
let data: GeniusSearchResponse = search_res.json().await?;
let target_title = title.to_lowercase();
let target_artist = artist.to_lowercase();
let best_match = data.response.hits.iter().find(|hit| {
if hit.hit_type != "song" {
return false;
}
let hit_title = hit.result.title.to_lowercase();
let hit_artist = hit.result.primary_artist.name.to_lowercase();
(hit_title.contains(&target_title) || target_title.contains(&hit_title))
&& (hit_artist.contains(&target_artist) || target_artist.contains(&hit_artist))
});
let selected = if let Some(hit) = best_match {
hit
} else if allow_inaccurate {
data.response
.hits
.first()
.ok_or_else(|| anyhow!("No results found on Genius"))?
} else {
return Err(anyhow!("No accurate match found on Genius"));
};
let lyrics_url = &selected.result.url;
let page_html = self.client.get(lyrics_url).send().await?.text().await?;
let document = Html::parse_document(&page_html);
let selector = Selector::parse(r#"div[data-lyrics-container="true"]"#)
.map_err(|_| anyhow!("Failed to create CSS selector for lyrics extraction"))?;
let mut lyrics = String::new();
fn extract_text(node: ElementRef, out: &mut String) {
for child in node.children() {
match child.value() {
Node::Text(text) => {
out.push_str(text);
}
Node::Element(element) => {
if let Some(child_elem) = ElementRef::wrap(child) {
if child_elem.attr("data-exclude-from-selection").is_some() {
continue;
}
if element.name() == "br" {
out.push('\n');
} else if element.name() == "script" || element.name() == "style" {
continue;
} else {
extract_text(child_elem, out);
}
}
}
_ => {}
}
}
}
for container in document.select(&selector) {
extract_text(container, &mut lyrics);
lyrics.push_str("\n\n");
}
let mut lyrics = lyrics.trim().to_string();
if let Some(idx) = lyrics.find("Lyrics") {
if idx < 150 {
let prefix = &lyrics[..idx];
let suffix = &lyrics[idx + "Lyrics".len()..];
if prefix.contains("Contributor")
|| suffix.starts_with('[')
|| prefix.to_lowercase().contains(&title.to_lowercase())
{
lyrics = suffix.trim_start().to_string();
}
}
}
if lyrics.ends_with("Embed") {
let mut idx = lyrics.len() - "Embed".len();
while idx > 0 && lyrics.as_bytes()[idx - 1].is_ascii_digit() {
idx -= 1;
}
lyrics = lyrics[..idx].trim_end().to_string();
}
if lyrics.is_empty() {
return Err(anyhow!(
"Could not extract lyrics text from the Genius page"
));
}
Ok(lyrics)
}
}
+162
View File
@@ -0,0 +1,162 @@
use crate::sources::LyricsSource;
use anyhow::{Context, Result, anyhow};
use async_trait::async_trait;
use id3::{Tag, TagLike};
use reqwest::{Client, StatusCode};
use serde::Deserialize;
pub struct LrcLibSource {
client: Client,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct LrcLibGetResponse {
_id: i64,
track_name: String,
artist_name: String,
album_name: String,
duration: Option<f64>,
_instrumental: bool,
plain_lyrics: Option<String>,
synced_lyrics: Option<String>,
}
impl LrcLibGetResponse {
fn extract_lyrics(&self) -> Option<String> {
if let Some(synced) = &self.synced_lyrics {
if !synced.trim().is_empty() {
return Some(synced.clone());
}
}
if let Some(plain) = &self.plain_lyrics {
if !plain.trim().is_empty() {
return Some(plain.clone());
}
}
None
}
}
impl LrcLibSource {
pub async fn new() -> Result<Self> {
let client = Client::builder()
.user_agent(format!(
"{name}/{version} (+{repo_url})",
name = env!("CARGO_PKG_NAME"),
version = env!("CARGO_PKG_VERSION"),
repo_url = env!("CARGO_PKG_REPOSITORY")
))
.build()
.context("Failed to build HTTP client for LrcLib")?;
Ok(Self { client })
}
}
#[async_trait]
impl LyricsSource for LrcLibSource {
fn name(&self) -> &'static str {
"LrcLib"
}
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String> {
let base_url = "https://lrclib.net/api";
let track_name = tag
.title()
.ok_or_else(|| anyhow!("Missing track title in ID3 tag"))?;
let artist_name = tag
.artist()
.ok_or_else(|| anyhow!("Missing artist name in ID3 tag"))?;
let album_name = tag.album().unwrap_or_default();
let target_duration = tag.duration().unwrap_or_default() / 1000;
let mut get_req = self
.client
.get(format!("{base_url}/get"))
.query(&[("track_name", track_name), ("artist_name", artist_name)]);
if !album_name.is_empty() {
get_req = get_req.query(&[("album_name", album_name)]);
}
if target_duration > 0 {
get_req = get_req.query(&[("duration", target_duration.to_string())]);
}
let response = get_req.send().await?;
if response.status() == StatusCode::OK {
let data: LrcLibGetResponse = response.json().await?;
if let Some(lyrics) = data.extract_lyrics() {
return Ok(lyrics);
}
}
let mut search_req = self
.client
.get(format!("{base_url}/search"))
.query(&[("track_name", track_name), ("artist_name", artist_name)]);
if !album_name.is_empty() {
search_req = search_req.query(&[("album_name", album_name)]);
}
let response = search_req.send().await?;
if response.status() == StatusCode::OK {
let data: Vec<LrcLibGetResponse> = response.json().await?;
let track_name_lower = track_name.to_lowercase();
let artist_name_lower = artist_name.to_lowercase();
let album_name_lower = album_name.to_lowercase();
let best_match = data
.iter()
.find(|item| {
item.track_name.to_lowercase() == track_name_lower
&& item.artist_name.to_lowercase() == artist_name_lower
&& item.album_name.to_lowercase() == album_name_lower
&& item
.duration
.map_or(false, |d| (d.round() as u32).abs_diff(target_duration) <= 2)
})
.or_else(|| {
data.iter().find(|item| {
item.track_name.to_lowercase() == track_name_lower
&& item.artist_name.to_lowercase() == artist_name_lower
&& item.album_name.to_lowercase() == album_name_lower
})
})
.or_else(|| {
data.iter().find(|item| {
item.track_name.to_lowercase() == track_name_lower
&& item.artist_name.to_lowercase() == artist_name_lower
})
});
if let Some(best) = best_match {
if let Some(lyrics) = best.extract_lyrics() {
return Ok(lyrics);
}
} else if allow_inaccurate {
if let Some(first) = data.first() {
if let Some(lyrics) = first.extract_lyrics() {
return Ok(lyrics);
}
}
}
} else {
return Err(anyhow!(
"LrcLib search API request failed with status {}: {}",
response.status(),
response.text().await.unwrap_or_default()
));
}
Ok("".to_string())
}
}
+25
View File
@@ -0,0 +1,25 @@
pub mod genius;
pub mod lrclib;
pub use genius::GeniusSource;
pub use lrclib::LrcLibSource;
use anyhow::{Result, anyhow};
use async_trait::async_trait;
use id3::Tag;
use std::sync::Arc;
#[async_trait]
pub trait LyricsSource: Send + Sync {
fn name(&self) -> &'static str;
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String>;
}
/// Фабрика для создания источников по их строковому названию
pub async fn create_source(name: &str) -> Result<Arc<dyn LyricsSource>> {
match name {
"lrclib" => Ok(Arc::new(LrcLibSource::new().await?)),
"genius" => Ok(Arc::new(GeniusSource::new().await?)),
_ => Err(anyhow!("Unknown source type: {}", name)),
}
}