Migrating data from multiple spreadsheets to multiple text files
This is like the reverse functionality of Text file to spreadsheet.
One or multiple .xlsx
files in the path of the script get opened and the content is split into multiple .txt
files.
For example, say you have two excel files in the folder:
file1.xlsx
file2.xlsx
The output created:
spreadsheet_to_text.py
"""
Reads in .xlsx files from path were the script is located.
Then the data of each column is split into a .txt file
"""
import glob
import openpyxl
from openpyxl.utils import get_column_letter
def get_text_filename(filename: str, column: int)->str:
"""
Creates a text filename based on .xlsx file filename and column
"""
return (filename.rstrip(".xlsx")
+ "_" + get_column_letter(column) + '.txt')
def xlsx_to_txt(filename: str):
"""
Extract data from a .xlsx file in the script folder into
multiple .txt files
"""
workbook = openpyxl.load_workbook(filename)
sheet_names = workbook.sheetnames
sheet = workbook[sheet_names[0]]
for column in range(1, sheet.max_column + 1):
if sheet.cell(row=1, column=column).value:
text_filename = get_text_filename(filename, column)
with open(text_filename, mode='w') as textfile:
for row in range(1, sheet.max_row + 1):
if sheet.cell(column=column, row=row).value:
textfile.writelines(
sheet.cell(column=column, row=row).value + 'n')
def spreadsheet_into_text():
"""main logic for split spreadsheet data into multiple text files"""
for filename in glob.iglob("*.xlsx"):
xlsx_to_txt(filename)
if __name__ == "__main__":
spreadsheet_into_text()
I already incorporated some improvements from Text file to spreadsheet. I wonder how the code can get further improved.
python python-3.x excel
add a comment |
This is like the reverse functionality of Text file to spreadsheet.
One or multiple .xlsx
files in the path of the script get opened and the content is split into multiple .txt
files.
For example, say you have two excel files in the folder:
file1.xlsx
file2.xlsx
The output created:
spreadsheet_to_text.py
"""
Reads in .xlsx files from path were the script is located.
Then the data of each column is split into a .txt file
"""
import glob
import openpyxl
from openpyxl.utils import get_column_letter
def get_text_filename(filename: str, column: int)->str:
"""
Creates a text filename based on .xlsx file filename and column
"""
return (filename.rstrip(".xlsx")
+ "_" + get_column_letter(column) + '.txt')
def xlsx_to_txt(filename: str):
"""
Extract data from a .xlsx file in the script folder into
multiple .txt files
"""
workbook = openpyxl.load_workbook(filename)
sheet_names = workbook.sheetnames
sheet = workbook[sheet_names[0]]
for column in range(1, sheet.max_column + 1):
if sheet.cell(row=1, column=column).value:
text_filename = get_text_filename(filename, column)
with open(text_filename, mode='w') as textfile:
for row in range(1, sheet.max_row + 1):
if sheet.cell(column=column, row=row).value:
textfile.writelines(
sheet.cell(column=column, row=row).value + 'n')
def spreadsheet_into_text():
"""main logic for split spreadsheet data into multiple text files"""
for filename in glob.iglob("*.xlsx"):
xlsx_to_txt(filename)
if __name__ == "__main__":
spreadsheet_into_text()
I already incorporated some improvements from Text file to spreadsheet. I wonder how the code can get further improved.
python python-3.x excel
add a comment |
This is like the reverse functionality of Text file to spreadsheet.
One or multiple .xlsx
files in the path of the script get opened and the content is split into multiple .txt
files.
For example, say you have two excel files in the folder:
file1.xlsx
file2.xlsx
The output created:
spreadsheet_to_text.py
"""
Reads in .xlsx files from path were the script is located.
Then the data of each column is split into a .txt file
"""
import glob
import openpyxl
from openpyxl.utils import get_column_letter
def get_text_filename(filename: str, column: int)->str:
"""
Creates a text filename based on .xlsx file filename and column
"""
return (filename.rstrip(".xlsx")
+ "_" + get_column_letter(column) + '.txt')
def xlsx_to_txt(filename: str):
"""
Extract data from a .xlsx file in the script folder into
multiple .txt files
"""
workbook = openpyxl.load_workbook(filename)
sheet_names = workbook.sheetnames
sheet = workbook[sheet_names[0]]
for column in range(1, sheet.max_column + 1):
if sheet.cell(row=1, column=column).value:
text_filename = get_text_filename(filename, column)
with open(text_filename, mode='w') as textfile:
for row in range(1, sheet.max_row + 1):
if sheet.cell(column=column, row=row).value:
textfile.writelines(
sheet.cell(column=column, row=row).value + 'n')
def spreadsheet_into_text():
"""main logic for split spreadsheet data into multiple text files"""
for filename in glob.iglob("*.xlsx"):
xlsx_to_txt(filename)
if __name__ == "__main__":
spreadsheet_into_text()
I already incorporated some improvements from Text file to spreadsheet. I wonder how the code can get further improved.
python python-3.x excel
This is like the reverse functionality of Text file to spreadsheet.
One or multiple .xlsx
files in the path of the script get opened and the content is split into multiple .txt
files.
For example, say you have two excel files in the folder:
file1.xlsx
file2.xlsx
The output created:
spreadsheet_to_text.py
"""
Reads in .xlsx files from path were the script is located.
Then the data of each column is split into a .txt file
"""
import glob
import openpyxl
from openpyxl.utils import get_column_letter
def get_text_filename(filename: str, column: int)->str:
"""
Creates a text filename based on .xlsx file filename and column
"""
return (filename.rstrip(".xlsx")
+ "_" + get_column_letter(column) + '.txt')
def xlsx_to_txt(filename: str):
"""
Extract data from a .xlsx file in the script folder into
multiple .txt files
"""
workbook = openpyxl.load_workbook(filename)
sheet_names = workbook.sheetnames
sheet = workbook[sheet_names[0]]
for column in range(1, sheet.max_column + 1):
if sheet.cell(row=1, column=column).value:
text_filename = get_text_filename(filename, column)
with open(text_filename, mode='w') as textfile:
for row in range(1, sheet.max_row + 1):
if sheet.cell(column=column, row=row).value:
textfile.writelines(
sheet.cell(column=column, row=row).value + 'n')
def spreadsheet_into_text():
"""main logic for split spreadsheet data into multiple text files"""
for filename in glob.iglob("*.xlsx"):
xlsx_to_txt(filename)
if __name__ == "__main__":
spreadsheet_into_text()
I already incorporated some improvements from Text file to spreadsheet. I wonder how the code can get further improved.
python python-3.x excel
python python-3.x excel
edited Dec 21 '18 at 19:17
alecxe
15k53478
15k53478
asked Dec 19 '18 at 19:29
Sandro4912Sandro4912
955121
955121
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
This looks quite clean in general. May I suggest a few minor improvements:
- I think you could just use
workbook.active
to get the sheet
instead of doing the
rstrip(".xlsx")
which would also right-strip out.sslsx
orsl.xs.ss
and even grab a part of the actual filename:
In [1]: "christmas.xlsx".rstrip(".xlsx")
Out[1]: 'christma'
use
os
module or the beautifulpathlib
to properly extract a filename without an extension:
In [1]: from pathlib import Path
In [2]: Path("christmas.xlsx").resolve().stem
Out[2]: 'christmas'
calculate what you can before the loop instead of inside it. For instance,
sheet.max_row
is something you could just remember in a variable at the top of your function and re-use inside. It's not a lot of savings, but attribute access still has its cost in Python:
max_row = sheet.max_row
something similar is happening when you get the value of a cell twice, instead:
cell_value = sheet.cell(column=column, row=row).value
if cell_value:
textfile.writelines(cell_value + 'n')
it may be a good idea to keep the nestedness at a minimum ("Flat is better than nested.") and would rather check for a reverse condition and use
continue
to move to the next iteration:
for column in range(1, sheet.max_column + 1):
if not sheet.cell(row=1, column=column).value:
continue
text_filename = get_text_filename(filename, column)
Some out-of-the-box ideas:
- feels like if you do it with
pandas.read_excel()
it may just become way more easier and beautiful
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already usewritelines
instead ofwrite
?
– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210000%2fmigrating-data-from-multiple-spreadsheets-to-multiple-text-files%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
This looks quite clean in general. May I suggest a few minor improvements:
- I think you could just use
workbook.active
to get the sheet
instead of doing the
rstrip(".xlsx")
which would also right-strip out.sslsx
orsl.xs.ss
and even grab a part of the actual filename:
In [1]: "christmas.xlsx".rstrip(".xlsx")
Out[1]: 'christma'
use
os
module or the beautifulpathlib
to properly extract a filename without an extension:
In [1]: from pathlib import Path
In [2]: Path("christmas.xlsx").resolve().stem
Out[2]: 'christmas'
calculate what you can before the loop instead of inside it. For instance,
sheet.max_row
is something you could just remember in a variable at the top of your function and re-use inside. It's not a lot of savings, but attribute access still has its cost in Python:
max_row = sheet.max_row
something similar is happening when you get the value of a cell twice, instead:
cell_value = sheet.cell(column=column, row=row).value
if cell_value:
textfile.writelines(cell_value + 'n')
it may be a good idea to keep the nestedness at a minimum ("Flat is better than nested.") and would rather check for a reverse condition and use
continue
to move to the next iteration:
for column in range(1, sheet.max_column + 1):
if not sheet.cell(row=1, column=column).value:
continue
text_filename = get_text_filename(filename, column)
Some out-of-the-box ideas:
- feels like if you do it with
pandas.read_excel()
it may just become way more easier and beautiful
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already usewritelines
instead ofwrite
?
– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
add a comment |
This looks quite clean in general. May I suggest a few minor improvements:
- I think you could just use
workbook.active
to get the sheet
instead of doing the
rstrip(".xlsx")
which would also right-strip out.sslsx
orsl.xs.ss
and even grab a part of the actual filename:
In [1]: "christmas.xlsx".rstrip(".xlsx")
Out[1]: 'christma'
use
os
module or the beautifulpathlib
to properly extract a filename without an extension:
In [1]: from pathlib import Path
In [2]: Path("christmas.xlsx").resolve().stem
Out[2]: 'christmas'
calculate what you can before the loop instead of inside it. For instance,
sheet.max_row
is something you could just remember in a variable at the top of your function and re-use inside. It's not a lot of savings, but attribute access still has its cost in Python:
max_row = sheet.max_row
something similar is happening when you get the value of a cell twice, instead:
cell_value = sheet.cell(column=column, row=row).value
if cell_value:
textfile.writelines(cell_value + 'n')
it may be a good idea to keep the nestedness at a minimum ("Flat is better than nested.") and would rather check for a reverse condition and use
continue
to move to the next iteration:
for column in range(1, sheet.max_column + 1):
if not sheet.cell(row=1, column=column).value:
continue
text_filename = get_text_filename(filename, column)
Some out-of-the-box ideas:
- feels like if you do it with
pandas.read_excel()
it may just become way more easier and beautiful
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already usewritelines
instead ofwrite
?
– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
add a comment |
This looks quite clean in general. May I suggest a few minor improvements:
- I think you could just use
workbook.active
to get the sheet
instead of doing the
rstrip(".xlsx")
which would also right-strip out.sslsx
orsl.xs.ss
and even grab a part of the actual filename:
In [1]: "christmas.xlsx".rstrip(".xlsx")
Out[1]: 'christma'
use
os
module or the beautifulpathlib
to properly extract a filename without an extension:
In [1]: from pathlib import Path
In [2]: Path("christmas.xlsx").resolve().stem
Out[2]: 'christmas'
calculate what you can before the loop instead of inside it. For instance,
sheet.max_row
is something you could just remember in a variable at the top of your function and re-use inside. It's not a lot of savings, but attribute access still has its cost in Python:
max_row = sheet.max_row
something similar is happening when you get the value of a cell twice, instead:
cell_value = sheet.cell(column=column, row=row).value
if cell_value:
textfile.writelines(cell_value + 'n')
it may be a good idea to keep the nestedness at a minimum ("Flat is better than nested.") and would rather check for a reverse condition and use
continue
to move to the next iteration:
for column in range(1, sheet.max_column + 1):
if not sheet.cell(row=1, column=column).value:
continue
text_filename = get_text_filename(filename, column)
Some out-of-the-box ideas:
- feels like if you do it with
pandas.read_excel()
it may just become way more easier and beautiful
This looks quite clean in general. May I suggest a few minor improvements:
- I think you could just use
workbook.active
to get the sheet
instead of doing the
rstrip(".xlsx")
which would also right-strip out.sslsx
orsl.xs.ss
and even grab a part of the actual filename:
In [1]: "christmas.xlsx".rstrip(".xlsx")
Out[1]: 'christma'
use
os
module or the beautifulpathlib
to properly extract a filename without an extension:
In [1]: from pathlib import Path
In [2]: Path("christmas.xlsx").resolve().stem
Out[2]: 'christmas'
calculate what you can before the loop instead of inside it. For instance,
sheet.max_row
is something you could just remember in a variable at the top of your function and re-use inside. It's not a lot of savings, but attribute access still has its cost in Python:
max_row = sheet.max_row
something similar is happening when you get the value of a cell twice, instead:
cell_value = sheet.cell(column=column, row=row).value
if cell_value:
textfile.writelines(cell_value + 'n')
it may be a good idea to keep the nestedness at a minimum ("Flat is better than nested.") and would rather check for a reverse condition and use
continue
to move to the next iteration:
for column in range(1, sheet.max_column + 1):
if not sheet.cell(row=1, column=column).value:
continue
text_filename = get_text_filename(filename, column)
Some out-of-the-box ideas:
- feels like if you do it with
pandas.read_excel()
it may just become way more easier and beautiful
edited yesterday
answered Dec 20 '18 at 0:57
alecxealecxe
15k53478
15k53478
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already usewritelines
instead ofwrite
?
– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
add a comment |
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already usewritelines
instead ofwrite
?
– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already use
writelines
instead of write
?– Sandro4912
yesterday
thanks for the high quality answer. I really liked how it simplyfies the code. One little thing. I think in the question i already use
writelines
instead of write
?– Sandro4912
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
@Sandro4912 :) oh sorry, fixed that. Thanks!
– alecxe
yesterday
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210000%2fmigrating-data-from-multiple-spreadsheets-to-multiple-text-files%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown