-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhello.py
More file actions
124 lines (89 loc) · 4.01 KB
/
hello.py
File metadata and controls
124 lines (89 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import fitz
from transformers import VitsModel, AutoTokenizer
import torch
import soundfile as sf
from flask import Flask, render_template, request, send_file
import os
from werkzeug.utils import secure_filename
import pytesseract
from pytesseract import image_to_string
import numpy as np
import pytesseract
import os
from scipy.signal import resample
from PIL import Image
pytesseract.pytesseract.tesseract_cmd ='/opt/homebrew/Cellar/tesseract/5.3.4_1/bin/tesseract'
def converting_pdf_into_images(file_path):
with fitz.open(file_path) as doc :
for i,page in enumerate(doc):
page = doc.load_page(i)
pix = page.get_pixmap()
output = f"instance/htmlfi/intermediate{i}.png"
pix.save(output)
def converting_to_audio(file_paths):
hindi_text = pytesseract.image_to_string(Image.open(file_paths), lang='hin')
print(hindi_text)
if hindi_text:
model = VitsModel.from_pretrained("facebook/mms-tts-hin")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-hin")
inputs = tokenizer(hindi_text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
return output,model
else:
return np.zeros(5),np.zeros(5)
app = Flask(__name__)
@app.route("/")
def upload_form():
return render_template("index.html")
@app.route("/uploader", methods=['POST', 'GET'])
def upload_file():
if request.method == 'POST':
uploaded_file = request.files['files']
if uploaded_file is None:
return "Please select a file to upload."
filename = secure_filename(uploaded_file.filename)
checker=filename
print(filename)
x,y=checker.split(".")
print(y)
os.makedirs(os.path.join(app.instance_path, 'htmlinputfi'), exist_ok=True)
os.makedirs(os.path.join(app.instance_path,'htmloutfi'),exist_ok=True)
file_path = os.path.join(app.instance_path, 'htmlinputfi', filename)
uploaded_file.save(file_path)
if(y =='png'):
output,model= converting_to_audio(file_path)
if((output<=0).all()):
return "No hindi text found"
output_np = output.squeeze().cpu().numpy()
output_np = output_np / max(abs(output_np))
sample_rate = model.config.sampling_rate
audio_path=os.join.path(app.instance_path,'htmloutputfi',f'{x}.wav')
sf.write(audio_path,output_np,sample_rate)
audio_path = os.path.join(app.instance_path, 'htmloutfi', f"{x}.wav")
array = np.array([0,0.5,0.75, 1, 2])
sf.write(audio_path,array,1)
converting_pdf_into_images(file_path)
open_file=fitz.open(file_path)
for i in range(open_file.page_count):
output,model=converting_to_audio(f'instance/htmlfi/intermediate{i}.png')
if((output<=0).all()):
return "no hindi text available"
output_np = output.squeeze().cpu().numpy()
output_np = output_np / max(abs(output_np))
sample_rate = model.config.sampling_rate
demo,sample=sf.read(f'instance/htmloutfi/{x}.wav')
demo=np.array(demo,dtype=np.float32)
output_np=np.array(output_np,dtype=np.float32)
concate=np.concatenate((demo,output_np),axis=None)
sf.write(audio_path, concate, sample_rate)
data,samplerate=sf.read(f'instance/htmloutfi/{x}.wav')
speed_factor=request.form.get('speed')
rate=int(len(data)/float(speed_factor))
resampled_audio=resample(data,rate)
sf.write(audio_path,resampled_audio,samplerate)
send_file(audio_path)
return "succesfully converted"
return "vidhi aadindhi ra neethoni vintha naatakam"
if __name__ == "__main__":
app.run(port=8000, debug=True)