File size: 5,452 Bytes
aaed37a
2158d6f
 
f034b93
 
aaed37a
 
2158d6f
20d8ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f034b93
20d8ce9
 
 
 
 
 
 
 
 
 
 
 
 
f034b93
20d8ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f034b93
 
20d8ce9
 
f034b93
20d8ce9
 
f034b93
20d8ce9
 
 
f034b93
20d8ce9
f034b93
 
20d8ce9
 
f034b93
20d8ce9
 
 
f034b93
20d8ce9
 
f034b93
20d8ce9
 
f034b93
 
 
 
 
 
20d8ce9
 
 
f034b93
 
20d8ce9
 
 
f034b93
 
20d8ce9
 
 
f034b93
 
 
 
20d8ce9
f034b93
aaed37a
 
 
 
 
 
 
f034b93
 
 
 
 
 
 
20d8ce9
 
 
f034b93
 
 
2158d6f
f034b93
 
 
 
 
 
 
 
 
8d24163
f034b93
 
8d24163
 
 
f034b93
 
8d24163
 
f034b93
 
 
aaed37a
 
8d24163
20d8ce9
 
 
 
 
 
 
 
 
 
f034b93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import streamlit as st
import librosa
import soundfile as sf
import numpy as np
import scipy.signal as signal
from io import BytesIO
import tempfile

def pitch_shift_with_formant_preservation(y, sr, n_steps):
    # Use a smaller frame size for better quality
    frame_length = 1024
    hop_length = 256
    
    # Apply pitch shifting with smaller frame size
    y_shifted = librosa.effects.pitch_shift(
        y=y,
        sr=sr,
        n_steps=n_steps,
        bins_per_octave=12,
        res_type='kaiser_fast'
    )
    
    return y_shifted

def enhance_female_characteristics(y, sr, settings):
    # Extract harmonics more gently
    y_harmonic, y_percussive = librosa.effects.hpss(
        y,
        margin=3.0,
        kernel_size=31
    )
    
    # Enhance harmonics subtly
    y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
    
    # Apply subtle EQ to enhance female characteristics
    y_filtered = apply_female_eq(y_enhanced, sr)
    
    return y_filtered

def apply_female_eq(y, sr):
    # Design filters for female voice enhancement
    # Boost frequencies around 1kHz-2kHz for feminine resonance
    b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
    y_filtered = signal.filtfilt(b1, a1, y)
    
    # Slight boost in high frequencies (3kHz-5kHz)
    b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
    y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
    
    return librosa.util.normalize(y_filtered)

def add_breathiness(y, sr, amount):
    # Generate more natural breath noise
    noise = np.random.normal(0, 0.005, len(y))
    
    # Filter the noise to sound more like breath
    b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
    breath_noise = signal.filtfilt(b, a, noise)
    
    # Add filtered noise
    y_breathy = y * (1 - amount) + breath_noise * amount
    return librosa.util.normalize(y_breathy)

def process_audio_advanced(audio_file, settings):
    # Load audio with a higher sample rate
    y, sr = librosa.load(audio_file, sr=24000)
    
    # Remove DC offset
    y = librosa.util.normalize(y - np.mean(y))
    
    # Apply pitch shifting
    y_shifted = pitch_shift_with_formant_preservation(
        y, 
        sr, 
        settings['pitch_shift']
    )
    
    # Enhance female characteristics
    y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
    
    # Add breathiness
    if settings['breathiness'] > 0:
        y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
    
    # Final normalization and cleaning
    y_final = librosa.util.normalize(y_enhanced)
    
    # Apply final smoothing
    y_final = signal.savgol_filter(y_final, 1001, 2)
    
    return y_final, sr

def create_voice_preset(preset_name):
    presets = {
        'Young Female': {
            'pitch_shift': 4.0,
            'harmonic_boost': 0.3,
            'breathiness': 0.15
        },
        'Mature Female': {
            'pitch_shift': 3.0,
            'harmonic_boost': 0.2,
            'breathiness': 0.1
        },
        'Soft Female': {
            'pitch_shift': 3.5,
            'harmonic_boost': 0.25,
            'breathiness': 0.2
        }
    }
    return presets.get(preset_name)

st.title("Improved Female Voice Converter")

uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])

if uploaded_file is not None:
    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_path = tmp_file.name

    preset_name = st.selectbox(
        "Select Voice Preset",
        ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
    )

    if preset_name == 'Custom':
        settings = {
            'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
            'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
            'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
        }
    else:
        settings = create_voice_preset(preset_name)

    if st.button("Convert Voice"):
        with st.spinner("Processing audio..."):
            try:
                processed_audio, sr = process_audio_advanced(tmp_path, settings)
                
                # Save to buffer
                buffer = BytesIO()
                sf.write(buffer, processed_audio, sr, format='WAV')
                
                # Display audio player
                st.audio(buffer, format='audio/wav')
                
                # Download button
                st.download_button(
                    label="Download Converted Audio",
                    data=buffer,
                    file_name="female_voice_converted.wav",
                    mime="audio/wav"
                )
                
            except Exception as e:
                st.error(f"Error processing audio: {str(e)}")

st.markdown("""
### Tips for Best Results:
1. Use high-quality input audio with clear speech
2. Start with presets and adjust if needed
3. Keep pitch shift between 3-5 for most natural results
4. Use minimal breathiness (0.1-0.2) for realistic sound
5. Record in a quiet environment with minimal background noise

### Recommended Settings:
- For younger female voice: pitch shift 4.0, harmonic boost 0.3
- For mature female voice: pitch shift 3.0, harmonic boost 0.2
- For soft female voice: pitch shift 3.5, harmonic boost 0.25
""")