mirror of
https://github.com/arabianq/lyrics_fetcher.git
synced 2026-04-28 08:01:22 +00:00
initial commit
This commit is contained in:
+221
@@ -0,0 +1,221 @@
|
||||
mod sources;
|
||||
|
||||
use sources::*;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_walkdir::WalkDir;
|
||||
use itertools::Itertools;
|
||||
|
||||
use futures::StreamExt;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
process::exit,
|
||||
sync::Arc,
|
||||
};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
use id3::{Tag, TagLike, Version, frame::Lyrics};
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Args {
|
||||
#[arg(required = true)]
|
||||
music_dir_path: PathBuf,
|
||||
|
||||
#[arg(short, long, default_value_t = 1)]
|
||||
threads: usize,
|
||||
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
overwrite_existing: bool,
|
||||
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
allow_inaccurate: bool,
|
||||
|
||||
#[arg(short, long, default_value = "lrclib", value_delimiter = ',')]
|
||||
sources: Vec<String>,
|
||||
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
default_value = "flac,mp3,ogg,opus,aac,wav,m4a,alac,aiff,ape",
|
||||
value_delimiter = ','
|
||||
)]
|
||||
extensions: Vec<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
let music_dir_path = args.music_dir_path;
|
||||
let threads = args.threads;
|
||||
let overwrite = args.overwrite_existing;
|
||||
let allow_inaccurate = args.allow_inaccurate;
|
||||
let lyrics_sources_raw = args.sources;
|
||||
let extensions: Vec<String> = args.extensions.iter().map(|s| s.to_lowercase()).collect();
|
||||
|
||||
if !music_dir_path.exists() {
|
||||
eprintln!("[ERROR] The specified music directory does not exist.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if threads < 1 {
|
||||
eprintln!("[ERROR] The number of threads must be at least 1.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if lyrics_sources_raw.is_empty() {
|
||||
eprintln!("[ERROR] At least one source must be specified.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if extensions.is_empty() {
|
||||
eprintln!("[ERROR] At least one file extension must be specified.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
let lyrics_sources_raw: Vec<String> = lyrics_sources_raw
|
||||
.iter()
|
||||
.unique()
|
||||
.map(|s| s.to_lowercase())
|
||||
.collect();
|
||||
|
||||
println!("[INFO] Music directory: {}", music_dir_path.display());
|
||||
println!("[INFO] Number of threads: {}", threads);
|
||||
println!("[INFO] Sources: {}", lyrics_sources_raw.join(", "));
|
||||
|
||||
let mut sources: Vec<Arc<dyn LyricsSource>> = vec![];
|
||||
for source_name in lyrics_sources_raw {
|
||||
let source_name = source_name.trim().to_lowercase();
|
||||
|
||||
match create_source(&source_name).await {
|
||||
Ok(source) => sources.push(source),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"[WARNING] Failed to initialize source '{}', skipping: {}",
|
||||
source_name, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sources.is_empty() {
|
||||
eprintln!("[ERROR] No valid sources were initialized. Exiting.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
let sources = Arc::new(sources);
|
||||
let semaphone = Arc::new(Semaphore::new(threads));
|
||||
let mut entries = WalkDir::new(music_dir_path);
|
||||
let mut tasks = Vec::new();
|
||||
|
||||
while let Some(entry) = entries.next().await {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
if entry.file_type().await?.is_file()
|
||||
&& extensions.contains(
|
||||
&entry
|
||||
.path()
|
||||
.extension()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_lowercase(),
|
||||
)
|
||||
{
|
||||
let file_path = entry.path().to_path_buf();
|
||||
let semaphone_clone = Arc::clone(&semaphone);
|
||||
let sources_clone = Arc::clone(&sources);
|
||||
let task = tokio::spawn(async move {
|
||||
let _permit = semaphone_clone.acquire().await.unwrap();
|
||||
process_file(&file_path, sources_clone, overwrite, allow_inaccurate).await;
|
||||
});
|
||||
tasks.push(task);
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!("[ERROR] Failed to read directory entry: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
for task in tasks {
|
||||
let _ = task.await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_file(
|
||||
file_path: &Path,
|
||||
sources: Arc<Vec<Arc<dyn LyricsSource>>>,
|
||||
overwrite: bool,
|
||||
allow_inaccurate: bool,
|
||||
) {
|
||||
let tag = Tag::async_read_from_path(&file_path).await;
|
||||
|
||||
match tag {
|
||||
Ok(tag) => {
|
||||
let lyrics: Vec<&Lyrics> = tag.lyrics().collect();
|
||||
if !lyrics.is_empty() && !overwrite {
|
||||
println!(
|
||||
"[INFO] File '{}' already has lyrics, skipping (use --overwrite to force)",
|
||||
file_path.display()
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
for source in sources.iter() {
|
||||
match source.fetch_lyrics(&tag, allow_inaccurate).await {
|
||||
Ok(lyrics) => {
|
||||
let lyrics = lyrics.trim();
|
||||
if lyrics.is_empty() {
|
||||
println!(
|
||||
"[INFO] Source '{}' did not return any lyrics for file '{}'",
|
||||
source.name(),
|
||||
file_path.display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
println!(
|
||||
"[INFO] Successfully fetched lyrics for file '{}' from source '{}'",
|
||||
file_path.display(),
|
||||
source.name()
|
||||
);
|
||||
|
||||
let mut tag = tag.clone();
|
||||
tag.remove_all_lyrics();
|
||||
tag.remove_all_synchronised_lyrics();
|
||||
|
||||
tag.add_frame(Lyrics {
|
||||
lang: "XXX".to_string(),
|
||||
description: format!("Fetched from {}", source.name()),
|
||||
text: lyrics.to_string(),
|
||||
});
|
||||
|
||||
tag.write_to_path(&file_path, Version::Id3v24)
|
||||
.map_err(|e| {
|
||||
eprintln!(
|
||||
"[ERROR] Failed to write tags into {}: {}",
|
||||
file_path.display(),
|
||||
e
|
||||
);
|
||||
})
|
||||
.ok();
|
||||
break;
|
||||
}
|
||||
Err(e) => eprintln!(
|
||||
"[ERROR] Failed to fetch lyrics for file '{}' from source '{}': {}",
|
||||
file_path.display(),
|
||||
source.name(),
|
||||
e
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!(
|
||||
"[ERROR] Failed to read ID3 tag for file '{}': {}",
|
||||
file_path.display(),
|
||||
e
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
use crate::sources::LyricsSource;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use async_trait::async_trait;
|
||||
use id3::{Tag, TagLike};
|
||||
use reqwest::Client;
|
||||
use scraper::{ElementRef, Html, Node, Selector};
|
||||
use serde::Deserialize;
|
||||
|
||||
pub struct GeniusSource {
|
||||
pub api_key: String,
|
||||
client: Client,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeniusSearchResponse {
|
||||
response: GeniusSearchData,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeniusSearchData {
|
||||
hits: Vec<GeniusHit>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeniusHit {
|
||||
#[serde(rename = "type")]
|
||||
hit_type: String,
|
||||
result: GeniusHitResult,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeniusHitResult {
|
||||
title: String,
|
||||
primary_artist: GeniusArtist,
|
||||
url: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeniusArtist {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl GeniusSource {
|
||||
pub async fn new() -> Result<Self> {
|
||||
let api_key = std::env::var("GENIUS_API_KEY")
|
||||
.context("GENIUS_API_KEY environment variable is required for Genius source")?;
|
||||
|
||||
let client = Client::builder()
|
||||
.user_agent(format!(
|
||||
"{name}/{version} (+{repo_url})",
|
||||
name = env!("CARGO_PKG_NAME"),
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
repo_url = env!("CARGO_PKG_REPOSITORY")
|
||||
))
|
||||
.build()
|
||||
.context("Failed to build HTTP client for Genius")?;
|
||||
|
||||
Ok(Self { api_key, client })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LyricsSource for GeniusSource {
|
||||
fn name(&self) -> &'static str {
|
||||
"Genius"
|
||||
}
|
||||
|
||||
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String> {
|
||||
let title = tag
|
||||
.title()
|
||||
.ok_or_else(|| anyhow!("Missing track title in ID3 tag"))?;
|
||||
let artist = tag
|
||||
.artist()
|
||||
.ok_or_else(|| anyhow!("Missing artist name in ID3 tag"))?;
|
||||
|
||||
let query = format!("{} {}", artist, title);
|
||||
|
||||
let search_res = self
|
||||
.client
|
||||
.get("https://api.genius.com/search")
|
||||
.query(&[("q", &query)])
|
||||
.bearer_auth(&self.api_key)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !search_res.status().is_success() {
|
||||
return Err(anyhow!(
|
||||
"Genius API search failed with status {}: {}",
|
||||
search_res.status(),
|
||||
search_res.text().await.unwrap_or_default()
|
||||
));
|
||||
}
|
||||
|
||||
let data: GeniusSearchResponse = search_res.json().await?;
|
||||
|
||||
let target_title = title.to_lowercase();
|
||||
let target_artist = artist.to_lowercase();
|
||||
|
||||
let best_match = data.response.hits.iter().find(|hit| {
|
||||
if hit.hit_type != "song" {
|
||||
return false;
|
||||
}
|
||||
let hit_title = hit.result.title.to_lowercase();
|
||||
let hit_artist = hit.result.primary_artist.name.to_lowercase();
|
||||
|
||||
(hit_title.contains(&target_title) || target_title.contains(&hit_title))
|
||||
&& (hit_artist.contains(&target_artist) || target_artist.contains(&hit_artist))
|
||||
});
|
||||
|
||||
let selected = if let Some(hit) = best_match {
|
||||
hit
|
||||
} else if allow_inaccurate {
|
||||
data.response
|
||||
.hits
|
||||
.first()
|
||||
.ok_or_else(|| anyhow!("No results found on Genius"))?
|
||||
} else {
|
||||
return Err(anyhow!("No accurate match found on Genius"));
|
||||
};
|
||||
|
||||
let lyrics_url = &selected.result.url;
|
||||
|
||||
let page_html = self.client.get(lyrics_url).send().await?.text().await?;
|
||||
|
||||
let document = Html::parse_document(&page_html);
|
||||
let selector = Selector::parse(r#"div[data-lyrics-container="true"]"#)
|
||||
.map_err(|_| anyhow!("Failed to create CSS selector for lyrics extraction"))?;
|
||||
|
||||
let mut lyrics = String::new();
|
||||
|
||||
fn extract_text(node: ElementRef, out: &mut String) {
|
||||
for child in node.children() {
|
||||
match child.value() {
|
||||
Node::Text(text) => {
|
||||
out.push_str(text);
|
||||
}
|
||||
Node::Element(element) => {
|
||||
if let Some(child_elem) = ElementRef::wrap(child) {
|
||||
if child_elem.attr("data-exclude-from-selection").is_some() {
|
||||
continue;
|
||||
}
|
||||
if element.name() == "br" {
|
||||
out.push('\n');
|
||||
} else if element.name() == "script" || element.name() == "style" {
|
||||
continue;
|
||||
} else {
|
||||
extract_text(child_elem, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for container in document.select(&selector) {
|
||||
extract_text(container, &mut lyrics);
|
||||
lyrics.push_str("\n\n");
|
||||
}
|
||||
|
||||
let mut lyrics = lyrics.trim().to_string();
|
||||
|
||||
if let Some(idx) = lyrics.find("Lyrics") {
|
||||
if idx < 150 {
|
||||
let prefix = &lyrics[..idx];
|
||||
let suffix = &lyrics[idx + "Lyrics".len()..];
|
||||
if prefix.contains("Contributor")
|
||||
|| suffix.starts_with('[')
|
||||
|| prefix.to_lowercase().contains(&title.to_lowercase())
|
||||
{
|
||||
lyrics = suffix.trim_start().to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lyrics.ends_with("Embed") {
|
||||
let mut idx = lyrics.len() - "Embed".len();
|
||||
while idx > 0 && lyrics.as_bytes()[idx - 1].is_ascii_digit() {
|
||||
idx -= 1;
|
||||
}
|
||||
lyrics = lyrics[..idx].trim_end().to_string();
|
||||
}
|
||||
|
||||
if lyrics.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"Could not extract lyrics text from the Genius page"
|
||||
));
|
||||
}
|
||||
|
||||
Ok(lyrics)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
use crate::sources::LyricsSource;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use async_trait::async_trait;
|
||||
use id3::{Tag, TagLike};
|
||||
use reqwest::{Client, StatusCode};
|
||||
use serde::Deserialize;
|
||||
|
||||
pub struct LrcLibSource {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct LrcLibGetResponse {
|
||||
_id: i64,
|
||||
track_name: String,
|
||||
artist_name: String,
|
||||
album_name: String,
|
||||
duration: Option<f64>,
|
||||
_instrumental: bool,
|
||||
plain_lyrics: Option<String>,
|
||||
synced_lyrics: Option<String>,
|
||||
}
|
||||
|
||||
impl LrcLibGetResponse {
|
||||
fn extract_lyrics(&self) -> Option<String> {
|
||||
if let Some(synced) = &self.synced_lyrics {
|
||||
if !synced.trim().is_empty() {
|
||||
return Some(synced.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(plain) = &self.plain_lyrics {
|
||||
if !plain.trim().is_empty() {
|
||||
return Some(plain.clone());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl LrcLibSource {
|
||||
pub async fn new() -> Result<Self> {
|
||||
let client = Client::builder()
|
||||
.user_agent(format!(
|
||||
"{name}/{version} (+{repo_url})",
|
||||
name = env!("CARGO_PKG_NAME"),
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
repo_url = env!("CARGO_PKG_REPOSITORY")
|
||||
))
|
||||
.build()
|
||||
.context("Failed to build HTTP client for LrcLib")?;
|
||||
|
||||
Ok(Self { client })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LyricsSource for LrcLibSource {
|
||||
fn name(&self) -> &'static str {
|
||||
"LrcLib"
|
||||
}
|
||||
|
||||
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String> {
|
||||
let base_url = "https://lrclib.net/api";
|
||||
|
||||
let track_name = tag
|
||||
.title()
|
||||
.ok_or_else(|| anyhow!("Missing track title in ID3 tag"))?;
|
||||
let artist_name = tag
|
||||
.artist()
|
||||
.ok_or_else(|| anyhow!("Missing artist name in ID3 tag"))?;
|
||||
let album_name = tag.album().unwrap_or_default();
|
||||
let target_duration = tag.duration().unwrap_or_default() / 1000;
|
||||
|
||||
let mut get_req = self
|
||||
.client
|
||||
.get(format!("{base_url}/get"))
|
||||
.query(&[("track_name", track_name), ("artist_name", artist_name)]);
|
||||
|
||||
if !album_name.is_empty() {
|
||||
get_req = get_req.query(&[("album_name", album_name)]);
|
||||
}
|
||||
|
||||
if target_duration > 0 {
|
||||
get_req = get_req.query(&[("duration", target_duration.to_string())]);
|
||||
}
|
||||
|
||||
let response = get_req.send().await?;
|
||||
|
||||
if response.status() == StatusCode::OK {
|
||||
let data: LrcLibGetResponse = response.json().await?;
|
||||
if let Some(lyrics) = data.extract_lyrics() {
|
||||
return Ok(lyrics);
|
||||
}
|
||||
}
|
||||
|
||||
let mut search_req = self
|
||||
.client
|
||||
.get(format!("{base_url}/search"))
|
||||
.query(&[("track_name", track_name), ("artist_name", artist_name)]);
|
||||
|
||||
if !album_name.is_empty() {
|
||||
search_req = search_req.query(&[("album_name", album_name)]);
|
||||
}
|
||||
|
||||
let response = search_req.send().await?;
|
||||
|
||||
if response.status() == StatusCode::OK {
|
||||
let data: Vec<LrcLibGetResponse> = response.json().await?;
|
||||
let track_name_lower = track_name.to_lowercase();
|
||||
let artist_name_lower = artist_name.to_lowercase();
|
||||
let album_name_lower = album_name.to_lowercase();
|
||||
|
||||
let best_match = data
|
||||
.iter()
|
||||
.find(|item| {
|
||||
item.track_name.to_lowercase() == track_name_lower
|
||||
&& item.artist_name.to_lowercase() == artist_name_lower
|
||||
&& item.album_name.to_lowercase() == album_name_lower
|
||||
&& item
|
||||
.duration
|
||||
.map_or(false, |d| (d.round() as u32).abs_diff(target_duration) <= 2)
|
||||
})
|
||||
.or_else(|| {
|
||||
data.iter().find(|item| {
|
||||
item.track_name.to_lowercase() == track_name_lower
|
||||
&& item.artist_name.to_lowercase() == artist_name_lower
|
||||
&& item.album_name.to_lowercase() == album_name_lower
|
||||
})
|
||||
})
|
||||
.or_else(|| {
|
||||
data.iter().find(|item| {
|
||||
item.track_name.to_lowercase() == track_name_lower
|
||||
&& item.artist_name.to_lowercase() == artist_name_lower
|
||||
})
|
||||
});
|
||||
|
||||
if let Some(best) = best_match {
|
||||
if let Some(lyrics) = best.extract_lyrics() {
|
||||
return Ok(lyrics);
|
||||
}
|
||||
} else if allow_inaccurate {
|
||||
if let Some(first) = data.first() {
|
||||
if let Some(lyrics) = first.extract_lyrics() {
|
||||
return Ok(lyrics);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"LrcLib search API request failed with status {}: {}",
|
||||
response.status(),
|
||||
response.text().await.unwrap_or_default()
|
||||
));
|
||||
}
|
||||
|
||||
Ok("".to_string())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
pub mod genius;
|
||||
pub mod lrclib;
|
||||
|
||||
pub use genius::GeniusSource;
|
||||
pub use lrclib::LrcLibSource;
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use async_trait::async_trait;
|
||||
use id3::Tag;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[async_trait]
|
||||
pub trait LyricsSource: Send + Sync {
|
||||
fn name(&self) -> &'static str;
|
||||
async fn fetch_lyrics(&self, tag: &Tag, allow_inaccurate: bool) -> Result<String>;
|
||||
}
|
||||
|
||||
/// Фабрика для создания источников по их строковому названию
|
||||
pub async fn create_source(name: &str) -> Result<Arc<dyn LyricsSource>> {
|
||||
match name {
|
||||
"lrclib" => Ok(Arc::new(LrcLibSource::new().await?)),
|
||||
"genius" => Ok(Arc::new(GeniusSource::new().await?)),
|
||||
_ => Err(anyhow!("Unknown source type: {}", name)),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user