Simple quicksort implementation in C++












0












$begingroup$


#include <iostream>    

using std::cout;
using std::endl;

int arr = {3,4,2,1,-5,116,31,4,0};

void QuickSort(int start,int end);
int Partition(int start, int end);
void Swap(int * , int *);
void PrintArr();

int main(int argc, char* argv)
{

PrintArr();

QuickSort(0, sizeof(arr)/sizeof(int));

PrintArr();


return 0;
}

void QuickSort(int start,int end)
{
if(start >= end) return;

int index = Partition(start , end);

QuickSort(start, index - 1);
QuickSort(index + 1, end);
}

int Partition(int start, int end)
{
int index = start;
int pivot = arr[end-1];
for(int i = start;i < end - 1;i++)
{
if(arr[i] <= pivot)
{
Swap(arr + index,arr + i);
index++;
}
}

Swap(arr + index, arr + end - 1);

return index;

}

void Swap(int *a , int *b)
{
if(*a != *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
}

void PrintArr()
{
for(int i = 0;i<sizeof(arr)/sizeof(arr[0]); i++)
cout<<arr[i] <<" ";

cout<<endl;
}









share|improve this question









$endgroup$








  • 1




    $begingroup$
    It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
    $endgroup$
    – Incomputable
    Aug 28 '16 at 14:08












  • $begingroup$
    Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
    $endgroup$
    – greybeard
    Aug 28 '16 at 18:16
















0












$begingroup$


#include <iostream>    

using std::cout;
using std::endl;

int arr = {3,4,2,1,-5,116,31,4,0};

void QuickSort(int start,int end);
int Partition(int start, int end);
void Swap(int * , int *);
void PrintArr();

int main(int argc, char* argv)
{

PrintArr();

QuickSort(0, sizeof(arr)/sizeof(int));

PrintArr();


return 0;
}

void QuickSort(int start,int end)
{
if(start >= end) return;

int index = Partition(start , end);

QuickSort(start, index - 1);
QuickSort(index + 1, end);
}

int Partition(int start, int end)
{
int index = start;
int pivot = arr[end-1];
for(int i = start;i < end - 1;i++)
{
if(arr[i] <= pivot)
{
Swap(arr + index,arr + i);
index++;
}
}

Swap(arr + index, arr + end - 1);

return index;

}

void Swap(int *a , int *b)
{
if(*a != *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
}

void PrintArr()
{
for(int i = 0;i<sizeof(arr)/sizeof(arr[0]); i++)
cout<<arr[i] <<" ";

cout<<endl;
}









share|improve this question









$endgroup$








  • 1




    $begingroup$
    It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
    $endgroup$
    – Incomputable
    Aug 28 '16 at 14:08












  • $begingroup$
    Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
    $endgroup$
    – greybeard
    Aug 28 '16 at 18:16














0












0








0


1



$begingroup$


#include <iostream>    

using std::cout;
using std::endl;

int arr = {3,4,2,1,-5,116,31,4,0};

void QuickSort(int start,int end);
int Partition(int start, int end);
void Swap(int * , int *);
void PrintArr();

int main(int argc, char* argv)
{

PrintArr();

QuickSort(0, sizeof(arr)/sizeof(int));

PrintArr();


return 0;
}

void QuickSort(int start,int end)
{
if(start >= end) return;

int index = Partition(start , end);

QuickSort(start, index - 1);
QuickSort(index + 1, end);
}

int Partition(int start, int end)
{
int index = start;
int pivot = arr[end-1];
for(int i = start;i < end - 1;i++)
{
if(arr[i] <= pivot)
{
Swap(arr + index,arr + i);
index++;
}
}

Swap(arr + index, arr + end - 1);

return index;

}

void Swap(int *a , int *b)
{
if(*a != *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
}

void PrintArr()
{
for(int i = 0;i<sizeof(arr)/sizeof(arr[0]); i++)
cout<<arr[i] <<" ";

cout<<endl;
}









share|improve this question









$endgroup$




#include <iostream>    

using std::cout;
using std::endl;

int arr = {3,4,2,1,-5,116,31,4,0};

void QuickSort(int start,int end);
int Partition(int start, int end);
void Swap(int * , int *);
void PrintArr();

int main(int argc, char* argv)
{

PrintArr();

QuickSort(0, sizeof(arr)/sizeof(int));

PrintArr();


return 0;
}

void QuickSort(int start,int end)
{
if(start >= end) return;

int index = Partition(start , end);

QuickSort(start, index - 1);
QuickSort(index + 1, end);
}

int Partition(int start, int end)
{
int index = start;
int pivot = arr[end-1];
for(int i = start;i < end - 1;i++)
{
if(arr[i] <= pivot)
{
Swap(arr + index,arr + i);
index++;
}
}

Swap(arr + index, arr + end - 1);

return index;

}

void Swap(int *a , int *b)
{
if(*a != *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
}

void PrintArr()
{
for(int i = 0;i<sizeof(arr)/sizeof(arr[0]); i++)
cout<<arr[i] <<" ";

cout<<endl;
}






c++ algorithm quick-sort






share|improve this question













share|improve this question











share|improve this question




share|improve this question










asked Aug 28 '16 at 14:03









Pranit KothariPranit Kothari

4251132




4251132








  • 1




    $begingroup$
    It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
    $endgroup$
    – Incomputable
    Aug 28 '16 at 14:08












  • $begingroup$
    Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
    $endgroup$
    – greybeard
    Aug 28 '16 at 18:16














  • 1




    $begingroup$
    It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
    $endgroup$
    – Incomputable
    Aug 28 '16 at 14:08












  • $begingroup$
    Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
    $endgroup$
    – greybeard
    Aug 28 '16 at 18:16








1




1




$begingroup$
It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
$endgroup$
– Incomputable
Aug 28 '16 at 14:08






$begingroup$
It would be great if you made some introduction to your code and the algorithm, as well as list any particular concerns you have about the code, if any. If it would be C, it would be pretty great implementation. Unfortunately, without iterators, it's not like standard library algorithm.
$endgroup$
– Incomputable
Aug 28 '16 at 14:08














$begingroup$
Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
$endgroup$
– greybeard
Aug 28 '16 at 18:16




$begingroup$
Uncommented code, and not quite literal. No mention of Lomuto/Hoare. No motivation for the check in swap(). Jumping between passing pointers and indices with global variable use.
$endgroup$
– greybeard
Aug 28 '16 at 18:16










2 Answers
2






active

oldest

votes


















4












$begingroup$

Writing an implementation of quicksort is fairly easy. Writing a good implementation that works at least reasonably well with almost any input is much more difficult.



Your uses a choice of pivot element that's pretty well know for bad behavior. By choosing the last element in a range (or the first) you essentially guarantee that it runs in $O(N^2)$ if the input is sorted. One typical way of preventing this problem is to use a "median of three" to select the pivot element--that is, look at the first, middle and last elements, and use the median of those three as the pivot.



There's one more minor detail there though: you don't just want to choose the median of those three and use it as your pivot. If those three elements are out of order (with respect to each other) you want to sort them, and write them back to the array in their sorted order. This improves behavior (somewhat) during the recursive calls.



Another subtlety that important to correct behavior (rather than just performance) is to use the correct order for your recursive calls. To assure against stack overflow, you always want to sort the smaller of the two partitions first. In conjunction with tail call elimination (which compilers have practiced for years) this can eliminate the possibility of stack overflow. If you want to be even more certain about it, you can change the final recursive call to iteration, so it's not up to the compiler to eliminate the tail recursion.



C++ adds another interesting twist, at least as soon as you start to deal with sorting collections of arbitrary types of objects (rather than just int). In this case, a user may well have written their own swap function that they expect you to use to swap objects of a particular type. Otherwise, you normally want to use std::swap rather than writing a swap of your own. To select between those correctly, you normally want to put using std::swap at the beginning of your partition function, and then use unqualified calls to swap (e.g., do not explicitly call std::swap). This way if the user has written a swap in the same namespace as the type you're sorting (and named it swap, like they should) your sorting routine will find that via ADL, and use it. Otherwise, it'll find std::swap because of the using std::swap;, and use that. This gives the behavior we want: use their swap routine by preference, but use std::swap if they haven't provided one.



A few other points: of course, the standard library has std::sort built in, which is typically implemented as introsort (which is just a quicksort that keeps track of recursion depth, and switches to a heap sort if quicksort is doing poorly for the input it's been given).



This code doesn't really look/feel much like most people expect C++ to be written. Right now, you're mostly using (and passing) indexes into the array. In C you'd more likely use pointers. In C++, you'd probably want to use iterators instead (though you can pass pointers as iterators). In C++ you'd also normally want something like this to be template, so it can be applied to almost any type of object, not just int. In addition, you'd typically allow the user to pass a comparison function so (for example) if they want to sort Person objects that don't support comparison using <, the user can still sort by passing (for example) a lambda expression saying they want to sort by a person's last name and first name (and default to std::less<T>, so if < is defined, it can be used by default, though the user can still pass a different comparison if they want).






share|improve this answer











$endgroup$













  • $begingroup$
    Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
    $endgroup$
    – greybeard
    Aug 29 '16 at 5:58










  • $begingroup$
    The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
    $endgroup$
    – CiaPan
    Dec 16 '18 at 20:54










  • $begingroup$
    @JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
    $endgroup$
    – CiaPan
    yesterday










  • $begingroup$
    @JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
    $endgroup$
    – CiaPan
    20 hours ago










  • $begingroup$
    @JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
    $endgroup$
    – CiaPan
    20 hours ago





















3












$begingroup$

Global Variables

Whether this is C or C++, global variables such as arr are always a bad idea. Global variables make writing and debugging code very difficult and allow for unintended side affects. The array arr should be defined in main and passed to all of the other functions.



Using std::ANYTHING

You as a software engineer may in your career write your own cin and cout. To do this you would have to have your own namespace so that std::cin and std::cout did not break your code. Including the namespace in the symbolic identified identifies which symbol you are using. This allows you to write programs and libraries that have the most meaningful names.



Use Container Classes

C++ has a great many container classes, some of which are std::vector, std::array, std::queue and std::stack. These container classes reduce the amount of code you need to write, and provide functionality such as the size of the array so that you don't have to write sizeof(arr)/sizeof(arr[0]). It would be much simpler to use the containers provided by the Standard Template Library.






share|improve this answer











$endgroup$













  • $begingroup$
    Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
    $endgroup$
    – Jerry Coffin
    Aug 28 '16 at 23:25











Your Answer





StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f139853%2fsimple-quicksort-implementation-in-c%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown

























2 Answers
2






active

oldest

votes








2 Answers
2






active

oldest

votes









active

oldest

votes






active

oldest

votes









4












$begingroup$

Writing an implementation of quicksort is fairly easy. Writing a good implementation that works at least reasonably well with almost any input is much more difficult.



Your uses a choice of pivot element that's pretty well know for bad behavior. By choosing the last element in a range (or the first) you essentially guarantee that it runs in $O(N^2)$ if the input is sorted. One typical way of preventing this problem is to use a "median of three" to select the pivot element--that is, look at the first, middle and last elements, and use the median of those three as the pivot.



There's one more minor detail there though: you don't just want to choose the median of those three and use it as your pivot. If those three elements are out of order (with respect to each other) you want to sort them, and write them back to the array in their sorted order. This improves behavior (somewhat) during the recursive calls.



Another subtlety that important to correct behavior (rather than just performance) is to use the correct order for your recursive calls. To assure against stack overflow, you always want to sort the smaller of the two partitions first. In conjunction with tail call elimination (which compilers have practiced for years) this can eliminate the possibility of stack overflow. If you want to be even more certain about it, you can change the final recursive call to iteration, so it's not up to the compiler to eliminate the tail recursion.



C++ adds another interesting twist, at least as soon as you start to deal with sorting collections of arbitrary types of objects (rather than just int). In this case, a user may well have written their own swap function that they expect you to use to swap objects of a particular type. Otherwise, you normally want to use std::swap rather than writing a swap of your own. To select between those correctly, you normally want to put using std::swap at the beginning of your partition function, and then use unqualified calls to swap (e.g., do not explicitly call std::swap). This way if the user has written a swap in the same namespace as the type you're sorting (and named it swap, like they should) your sorting routine will find that via ADL, and use it. Otherwise, it'll find std::swap because of the using std::swap;, and use that. This gives the behavior we want: use their swap routine by preference, but use std::swap if they haven't provided one.



A few other points: of course, the standard library has std::sort built in, which is typically implemented as introsort (which is just a quicksort that keeps track of recursion depth, and switches to a heap sort if quicksort is doing poorly for the input it's been given).



This code doesn't really look/feel much like most people expect C++ to be written. Right now, you're mostly using (and passing) indexes into the array. In C you'd more likely use pointers. In C++, you'd probably want to use iterators instead (though you can pass pointers as iterators). In C++ you'd also normally want something like this to be template, so it can be applied to almost any type of object, not just int. In addition, you'd typically allow the user to pass a comparison function so (for example) if they want to sort Person objects that don't support comparison using <, the user can still sort by passing (for example) a lambda expression saying they want to sort by a person's last name and first name (and default to std::less<T>, so if < is defined, it can be used by default, though the user can still pass a different comparison if they want).






share|improve this answer











$endgroup$













  • $begingroup$
    Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
    $endgroup$
    – greybeard
    Aug 29 '16 at 5:58










  • $begingroup$
    The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
    $endgroup$
    – CiaPan
    Dec 16 '18 at 20:54










  • $begingroup$
    @JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
    $endgroup$
    – CiaPan
    yesterday










  • $begingroup$
    @JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
    $endgroup$
    – CiaPan
    20 hours ago










  • $begingroup$
    @JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
    $endgroup$
    – CiaPan
    20 hours ago


















4












$begingroup$

Writing an implementation of quicksort is fairly easy. Writing a good implementation that works at least reasonably well with almost any input is much more difficult.



Your uses a choice of pivot element that's pretty well know for bad behavior. By choosing the last element in a range (or the first) you essentially guarantee that it runs in $O(N^2)$ if the input is sorted. One typical way of preventing this problem is to use a "median of three" to select the pivot element--that is, look at the first, middle and last elements, and use the median of those three as the pivot.



There's one more minor detail there though: you don't just want to choose the median of those three and use it as your pivot. If those three elements are out of order (with respect to each other) you want to sort them, and write them back to the array in their sorted order. This improves behavior (somewhat) during the recursive calls.



Another subtlety that important to correct behavior (rather than just performance) is to use the correct order for your recursive calls. To assure against stack overflow, you always want to sort the smaller of the two partitions first. In conjunction with tail call elimination (which compilers have practiced for years) this can eliminate the possibility of stack overflow. If you want to be even more certain about it, you can change the final recursive call to iteration, so it's not up to the compiler to eliminate the tail recursion.



C++ adds another interesting twist, at least as soon as you start to deal with sorting collections of arbitrary types of objects (rather than just int). In this case, a user may well have written their own swap function that they expect you to use to swap objects of a particular type. Otherwise, you normally want to use std::swap rather than writing a swap of your own. To select between those correctly, you normally want to put using std::swap at the beginning of your partition function, and then use unqualified calls to swap (e.g., do not explicitly call std::swap). This way if the user has written a swap in the same namespace as the type you're sorting (and named it swap, like they should) your sorting routine will find that via ADL, and use it. Otherwise, it'll find std::swap because of the using std::swap;, and use that. This gives the behavior we want: use their swap routine by preference, but use std::swap if they haven't provided one.



A few other points: of course, the standard library has std::sort built in, which is typically implemented as introsort (which is just a quicksort that keeps track of recursion depth, and switches to a heap sort if quicksort is doing poorly for the input it's been given).



This code doesn't really look/feel much like most people expect C++ to be written. Right now, you're mostly using (and passing) indexes into the array. In C you'd more likely use pointers. In C++, you'd probably want to use iterators instead (though you can pass pointers as iterators). In C++ you'd also normally want something like this to be template, so it can be applied to almost any type of object, not just int. In addition, you'd typically allow the user to pass a comparison function so (for example) if they want to sort Person objects that don't support comparison using <, the user can still sort by passing (for example) a lambda expression saying they want to sort by a person's last name and first name (and default to std::less<T>, so if < is defined, it can be used by default, though the user can still pass a different comparison if they want).






share|improve this answer











$endgroup$













  • $begingroup$
    Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
    $endgroup$
    – greybeard
    Aug 29 '16 at 5:58










  • $begingroup$
    The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
    $endgroup$
    – CiaPan
    Dec 16 '18 at 20:54










  • $begingroup$
    @JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
    $endgroup$
    – CiaPan
    yesterday










  • $begingroup$
    @JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
    $endgroup$
    – CiaPan
    20 hours ago










  • $begingroup$
    @JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
    $endgroup$
    – CiaPan
    20 hours ago
















4












4








4





$begingroup$

Writing an implementation of quicksort is fairly easy. Writing a good implementation that works at least reasonably well with almost any input is much more difficult.



Your uses a choice of pivot element that's pretty well know for bad behavior. By choosing the last element in a range (or the first) you essentially guarantee that it runs in $O(N^2)$ if the input is sorted. One typical way of preventing this problem is to use a "median of three" to select the pivot element--that is, look at the first, middle and last elements, and use the median of those three as the pivot.



There's one more minor detail there though: you don't just want to choose the median of those three and use it as your pivot. If those three elements are out of order (with respect to each other) you want to sort them, and write them back to the array in their sorted order. This improves behavior (somewhat) during the recursive calls.



Another subtlety that important to correct behavior (rather than just performance) is to use the correct order for your recursive calls. To assure against stack overflow, you always want to sort the smaller of the two partitions first. In conjunction with tail call elimination (which compilers have practiced for years) this can eliminate the possibility of stack overflow. If you want to be even more certain about it, you can change the final recursive call to iteration, so it's not up to the compiler to eliminate the tail recursion.



C++ adds another interesting twist, at least as soon as you start to deal with sorting collections of arbitrary types of objects (rather than just int). In this case, a user may well have written their own swap function that they expect you to use to swap objects of a particular type. Otherwise, you normally want to use std::swap rather than writing a swap of your own. To select between those correctly, you normally want to put using std::swap at the beginning of your partition function, and then use unqualified calls to swap (e.g., do not explicitly call std::swap). This way if the user has written a swap in the same namespace as the type you're sorting (and named it swap, like they should) your sorting routine will find that via ADL, and use it. Otherwise, it'll find std::swap because of the using std::swap;, and use that. This gives the behavior we want: use their swap routine by preference, but use std::swap if they haven't provided one.



A few other points: of course, the standard library has std::sort built in, which is typically implemented as introsort (which is just a quicksort that keeps track of recursion depth, and switches to a heap sort if quicksort is doing poorly for the input it's been given).



This code doesn't really look/feel much like most people expect C++ to be written. Right now, you're mostly using (and passing) indexes into the array. In C you'd more likely use pointers. In C++, you'd probably want to use iterators instead (though you can pass pointers as iterators). In C++ you'd also normally want something like this to be template, so it can be applied to almost any type of object, not just int. In addition, you'd typically allow the user to pass a comparison function so (for example) if they want to sort Person objects that don't support comparison using <, the user can still sort by passing (for example) a lambda expression saying they want to sort by a person's last name and first name (and default to std::less<T>, so if < is defined, it can be used by default, though the user can still pass a different comparison if they want).






share|improve this answer











$endgroup$



Writing an implementation of quicksort is fairly easy. Writing a good implementation that works at least reasonably well with almost any input is much more difficult.



Your uses a choice of pivot element that's pretty well know for bad behavior. By choosing the last element in a range (or the first) you essentially guarantee that it runs in $O(N^2)$ if the input is sorted. One typical way of preventing this problem is to use a "median of three" to select the pivot element--that is, look at the first, middle and last elements, and use the median of those three as the pivot.



There's one more minor detail there though: you don't just want to choose the median of those three and use it as your pivot. If those three elements are out of order (with respect to each other) you want to sort them, and write them back to the array in their sorted order. This improves behavior (somewhat) during the recursive calls.



Another subtlety that important to correct behavior (rather than just performance) is to use the correct order for your recursive calls. To assure against stack overflow, you always want to sort the smaller of the two partitions first. In conjunction with tail call elimination (which compilers have practiced for years) this can eliminate the possibility of stack overflow. If you want to be even more certain about it, you can change the final recursive call to iteration, so it's not up to the compiler to eliminate the tail recursion.



C++ adds another interesting twist, at least as soon as you start to deal with sorting collections of arbitrary types of objects (rather than just int). In this case, a user may well have written their own swap function that they expect you to use to swap objects of a particular type. Otherwise, you normally want to use std::swap rather than writing a swap of your own. To select between those correctly, you normally want to put using std::swap at the beginning of your partition function, and then use unqualified calls to swap (e.g., do not explicitly call std::swap). This way if the user has written a swap in the same namespace as the type you're sorting (and named it swap, like they should) your sorting routine will find that via ADL, and use it. Otherwise, it'll find std::swap because of the using std::swap;, and use that. This gives the behavior we want: use their swap routine by preference, but use std::swap if they haven't provided one.



A few other points: of course, the standard library has std::sort built in, which is typically implemented as introsort (which is just a quicksort that keeps track of recursion depth, and switches to a heap sort if quicksort is doing poorly for the input it's been given).



This code doesn't really look/feel much like most people expect C++ to be written. Right now, you're mostly using (and passing) indexes into the array. In C you'd more likely use pointers. In C++, you'd probably want to use iterators instead (though you can pass pointers as iterators). In C++ you'd also normally want something like this to be template, so it can be applied to almost any type of object, not just int. In addition, you'd typically allow the user to pass a comparison function so (for example) if they want to sort Person objects that don't support comparison using <, the user can still sort by passing (for example) a lambda expression saying they want to sort by a person's last name and first name (and default to std::less<T>, so if < is defined, it can be used by default, though the user can still pass a different comparison if they want).







share|improve this answer














share|improve this answer



share|improve this answer








edited 13 hours ago

























answered Aug 29 '16 at 0:12









Jerry CoffinJerry Coffin

28.4k462129




28.4k462129












  • $begingroup$
    Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
    $endgroup$
    – greybeard
    Aug 29 '16 at 5:58










  • $begingroup$
    The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
    $endgroup$
    – CiaPan
    Dec 16 '18 at 20:54










  • $begingroup$
    @JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
    $endgroup$
    – CiaPan
    yesterday










  • $begingroup$
    @JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
    $endgroup$
    – CiaPan
    20 hours ago










  • $begingroup$
    @JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
    $endgroup$
    – CiaPan
    20 hours ago




















  • $begingroup$
    Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
    $endgroup$
    – greybeard
    Aug 29 '16 at 5:58










  • $begingroup$
    The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
    $endgroup$
    – CiaPan
    Dec 16 '18 at 20:54










  • $begingroup$
    @JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
    $endgroup$
    – CiaPan
    yesterday










  • $begingroup$
    @JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
    $endgroup$
    – CiaPan
    20 hours ago










  • $begingroup$
    @JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
    $endgroup$
    – CiaPan
    20 hours ago


















$begingroup$
Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
$endgroup$
– greybeard
Aug 29 '16 at 5:58




$begingroup$
Putting items inspected in pivot selection in order: How does this improve [behaviour …] during the recursive calls? I'd expect the interjacent partition to necessarily do that. Excellent point about std::swap().
$endgroup$
– greybeard
Aug 29 '16 at 5:58












$begingroup$
The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
$endgroup$
– CiaPan
Dec 16 '18 at 20:54




$begingroup$
The point about 'the correct order for your recursive calls' is incomplete, hence wrong: the order itself has absolutely nothing to do with the correct behavior. The incorrect behavior can be avoided (so the correct behavior guaranteed) by performing a recursive call on the shorter partition first and then NOT performing a recursive call on the longer one, but using a tail recursion instead (i.e. processing it at the same level of recursion).
$endgroup$
– CiaPan
Dec 16 '18 at 20:54












$begingroup$
@JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
$endgroup$
– CiaPan
yesterday




$begingroup$
@JerryCoffin You said: To assure against stack overflow, you always want to sort the smaller of the two partitions first. That does not suffice. If you perform both recursive calls, then the maximum recursion depth will depend solely on partitioning pattern (which results from the input order and partitioning algorithm) and not on the choice which subpartition to sort first. The key step, and the reason of choosing the shorter partition first, is NOT recuring on the longer one, either explicitly or by automatic truncation of a tail recursion.
$endgroup$
– CiaPan
yesterday












$begingroup$
@JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
$endgroup$
– CiaPan
20 hours ago




$begingroup$
@JerryCoffin Nope. Suppose the array has N items and Partition() returns N. Then the first recursive call handles the zero-item partition, the second one handles the N–1 partition. Inside the second call the N-1 array similary gets partitioned into 0 and N-2 parts. The first call handles the trivial, shorter part, the second one handles N-2 items at the second level of recursion. And so on, until the single-item subarray gets handled at the (N-1)-th level of recursion. So ordering the calls alone does not prevent the worst case of recursion depth.
$endgroup$
– CiaPan
20 hours ago












$begingroup$
@JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
$endgroup$
– CiaPan
20 hours ago






$begingroup$
@JerryCoffin If one draws a tree of calls, corresponding precisely to the results of partitioning at every stage, the ordering the calls corresponds to swapping the left and the right subtree of any chosen node in the tree – but it does not affect the depth of any node, leaves among them. And the depth of each node is precisely the depth of recursion in the corresponding step of the algorithm. The worst-case example from my comment above makes the tree degenerated to a list. Reordering the calls may make the list to zig-zag or to go straight, but it can not reduce its length.
$endgroup$
– CiaPan
20 hours ago















3












$begingroup$

Global Variables

Whether this is C or C++, global variables such as arr are always a bad idea. Global variables make writing and debugging code very difficult and allow for unintended side affects. The array arr should be defined in main and passed to all of the other functions.



Using std::ANYTHING

You as a software engineer may in your career write your own cin and cout. To do this you would have to have your own namespace so that std::cin and std::cout did not break your code. Including the namespace in the symbolic identified identifies which symbol you are using. This allows you to write programs and libraries that have the most meaningful names.



Use Container Classes

C++ has a great many container classes, some of which are std::vector, std::array, std::queue and std::stack. These container classes reduce the amount of code you need to write, and provide functionality such as the size of the array so that you don't have to write sizeof(arr)/sizeof(arr[0]). It would be much simpler to use the containers provided by the Standard Template Library.






share|improve this answer











$endgroup$













  • $begingroup$
    Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
    $endgroup$
    – Jerry Coffin
    Aug 28 '16 at 23:25
















3












$begingroup$

Global Variables

Whether this is C or C++, global variables such as arr are always a bad idea. Global variables make writing and debugging code very difficult and allow for unintended side affects. The array arr should be defined in main and passed to all of the other functions.



Using std::ANYTHING

You as a software engineer may in your career write your own cin and cout. To do this you would have to have your own namespace so that std::cin and std::cout did not break your code. Including the namespace in the symbolic identified identifies which symbol you are using. This allows you to write programs and libraries that have the most meaningful names.



Use Container Classes

C++ has a great many container classes, some of which are std::vector, std::array, std::queue and std::stack. These container classes reduce the amount of code you need to write, and provide functionality such as the size of the array so that you don't have to write sizeof(arr)/sizeof(arr[0]). It would be much simpler to use the containers provided by the Standard Template Library.






share|improve this answer











$endgroup$













  • $begingroup$
    Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
    $endgroup$
    – Jerry Coffin
    Aug 28 '16 at 23:25














3












3








3





$begingroup$

Global Variables

Whether this is C or C++, global variables such as arr are always a bad idea. Global variables make writing and debugging code very difficult and allow for unintended side affects. The array arr should be defined in main and passed to all of the other functions.



Using std::ANYTHING

You as a software engineer may in your career write your own cin and cout. To do this you would have to have your own namespace so that std::cin and std::cout did not break your code. Including the namespace in the symbolic identified identifies which symbol you are using. This allows you to write programs and libraries that have the most meaningful names.



Use Container Classes

C++ has a great many container classes, some of which are std::vector, std::array, std::queue and std::stack. These container classes reduce the amount of code you need to write, and provide functionality such as the size of the array so that you don't have to write sizeof(arr)/sizeof(arr[0]). It would be much simpler to use the containers provided by the Standard Template Library.






share|improve this answer











$endgroup$



Global Variables

Whether this is C or C++, global variables such as arr are always a bad idea. Global variables make writing and debugging code very difficult and allow for unintended side affects. The array arr should be defined in main and passed to all of the other functions.



Using std::ANYTHING

You as a software engineer may in your career write your own cin and cout. To do this you would have to have your own namespace so that std::cin and std::cout did not break your code. Including the namespace in the symbolic identified identifies which symbol you are using. This allows you to write programs and libraries that have the most meaningful names.



Use Container Classes

C++ has a great many container classes, some of which are std::vector, std::array, std::queue and std::stack. These container classes reduce the amount of code you need to write, and provide functionality such as the size of the array so that you don't have to write sizeof(arr)/sizeof(arr[0]). It would be much simpler to use the containers provided by the Standard Template Library.







share|improve this answer














share|improve this answer



share|improve this answer








edited Aug 28 '16 at 14:52

























answered Aug 28 '16 at 14:39









pacmaninbwpacmaninbw

5,25321537




5,25321537












  • $begingroup$
    Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
    $endgroup$
    – Jerry Coffin
    Aug 28 '16 at 23:25


















  • $begingroup$
    Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
    $endgroup$
    – Jerry Coffin
    Aug 28 '16 at 23:25
















$begingroup$
Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
$endgroup$
– Jerry Coffin
Aug 28 '16 at 23:25




$begingroup$
Actually, this is one of those situations where there's a strong case to be made that to do the job correctly, you must have using std::swap; (or using namespace std;, though that's generally inferior).
$endgroup$
– Jerry Coffin
Aug 28 '16 at 23:25


















draft saved

draft discarded




















































Thanks for contributing an answer to Code Review Stack Exchange!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


Use MathJax to format equations. MathJax reference.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f139853%2fsimple-quicksort-implementation-in-c%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







Popular posts from this blog

How to reconfigure Docker Trusted Registry 2.x.x to use CEPH FS mount instead of NFS and other traditional...

is 'sed' thread safe

How to make a Squid Proxy server?