-
-
Notifications
You must be signed in to change notification settings - Fork 118
Expand file tree
/
Copy pathpycode.js
More file actions
81 lines (65 loc) · 2.31 KB
/
pycode.js
File metadata and controls
81 lines (65 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/*
* Project Name : Visual Python
* Description : GUI-based Python code generator
* File Name : pycode.js
* Author : Black Logic
* Note : Define constant value
* License : GNU GPLv3 with Visual Python special exception
* Date : 2021. 08. 14
* Change Date :
*/
//============================================================================
// Define constant
//============================================================================
define ([
], function() {
'use strict';
//========================================================================
// vpPDF
//========================================================================
const PDF_SHOW = '!pip show PyMuPDF nltk'
const PDF_INSTALL1 = '!pip install PyMuPDF'
const PDF_INSTALL2 = '!pip install nltk'
const PDF_IMPORT = `import pandas as pd
import fitz
import nltk
nltk.download('punkt')`;
const PDF_FUNC = `def vp_pdf_get_sentence(fname_lst):
'''
Get sentence from pdf file by PyMuPDF
'''
df = pd.DataFrame()
for fname in fname_lst:
if fname.split('.')[-1] != 'pdf': continue
try:
doc = fitz.open(fname)
sentence_lst = []
for page in doc:
block_lst = page.getText('blocks')
text_lst = [block[4] for block in block_lst if block[6] == 0]
text = '\\n'.join(text_lst)
sentence_lst.extend([sentence for sentence in nltk.sent_tokenize(text)])
doc.close()
except Exception as e:
print(e)
continue
df_doc = pd.DataFrame({
'fname': fname.split('/')[-1],
'sentence': sentence_lst
})
df = pd.concat([df,df_doc])
return df.reset_index().drop('index', axis=1)`;
const PDF_CMD = 'df = vp_pdf_get_sentence(pdf_lst)\ndf'
//========================================================================
// return value
//========================================================================
return {
PDF_SHOW,
PDF_INSTALL1,
PDF_INSTALL2,
PDF_IMPORT,
PDF_FUNC,
PDF_CMD
};
}); /* function, define */
/* End of file */