Why are Java and C++ slower than PHP in Android? [on hold]











-1














I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:



function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";

if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{

preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}

}


function back_text($AppendText,$all_words,$row_back,$maxWords){

$added_next_text ="";
$text ="";


if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);

}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];

}

}

if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}

function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}



function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;


$keywords = array();
$all_words = array();

$countK = 1;
$countS = 1;
$tot_countS = 1;


static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";


$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}‌])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'

];

$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);

$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);



for ($i = 0; $i < count($result); $i++) {

if (trim($result[$i]) != '' || trim($result[$i]) != null)
{

if(preg_match("/(".$cool.")/u", $result[$i])){

$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}


$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;

$tot_countS = $tot_countS+1;
}

}

foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);

for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));

if($ff!= ''){



if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}


$count++;
}
}

$hitword =$dd;




$currKey_index = key($item);


if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}

if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}

$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);



$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}


return $output;

}

$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;

echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));


It works fast, but when I write the same functionality in Java for Android and test it was very slow:



public String getsnipit(String myString,String rhhlterm,String value){


String output = "";
String body;
try {
InputStream is = getAssets().open(myString);

/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/

body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));

//body = removeTag(body);

// body = striphtmlbutbr(body);
body = clean(body);



String words = rhhlterm;

words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");

String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);

String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";

int tot_countS = 0;
int countK = 0;

Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;

List<Cube> myCubes = new ArrayList<>();

for (int i = 0; i < list2.length; i++) {

if(isNullOrEmpty(list2[i].trim())==false){

Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {

// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;

if(countK >=5) {
break;
}
}


//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;



}



}


// Log.d("ggg",gfg1);

Integer count = 1;

Integer count2 = 0;



String back= null;
String next= null;

String next_text =null;
String back_text =null;

Integer maxWords = 7;


String hit = "";


for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");



for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){



if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));


}


count = count+1;;
}

}

// Log.d(" dd", dd);
hit = dd;

if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}


if(all_words.get(myCubes.get(i).index+1) != null){

next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}



// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}

} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}


output = output.replaceAll("\s+"," ");

return output;

}


I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?









share















migration rejected from superuser.com yesterday


This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.





put on hold as unclear what you're asking by 200_success, Jamal yesterday


Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.















  • ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
    – jsem
    yesterday










  • i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
    – jsem
    yesterday












  • when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
    – jsem
    yesterday










  • Wait, you didn't write this code? How did you come by it?
    – bruglesco
    yesterday








  • 4




    This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
    – mdfst13
    yesterday
















-1














I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:



function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";

if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{

preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}

}


function back_text($AppendText,$all_words,$row_back,$maxWords){

$added_next_text ="";
$text ="";


if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);

}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];

}

}

if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}

function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}



function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;


$keywords = array();
$all_words = array();

$countK = 1;
$countS = 1;
$tot_countS = 1;


static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";


$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}‌])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'

];

$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);

$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);



for ($i = 0; $i < count($result); $i++) {

if (trim($result[$i]) != '' || trim($result[$i]) != null)
{

if(preg_match("/(".$cool.")/u", $result[$i])){

$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}


$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;

$tot_countS = $tot_countS+1;
}

}

foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);

for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));

if($ff!= ''){



if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}


$count++;
}
}

$hitword =$dd;




$currKey_index = key($item);


if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}

if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}

$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);



$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}


return $output;

}

$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;

echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));


It works fast, but when I write the same functionality in Java for Android and test it was very slow:



public String getsnipit(String myString,String rhhlterm,String value){


String output = "";
String body;
try {
InputStream is = getAssets().open(myString);

/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/

body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));

//body = removeTag(body);

// body = striphtmlbutbr(body);
body = clean(body);



String words = rhhlterm;

words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");

String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);

String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";

int tot_countS = 0;
int countK = 0;

Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;

List<Cube> myCubes = new ArrayList<>();

for (int i = 0; i < list2.length; i++) {

if(isNullOrEmpty(list2[i].trim())==false){

Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {

// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;

if(countK >=5) {
break;
}
}


//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;



}



}


// Log.d("ggg",gfg1);

Integer count = 1;

Integer count2 = 0;



String back= null;
String next= null;

String next_text =null;
String back_text =null;

Integer maxWords = 7;


String hit = "";


for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");



for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){



if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));


}


count = count+1;;
}

}

// Log.d(" dd", dd);
hit = dd;

if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}


if(all_words.get(myCubes.get(i).index+1) != null){

next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}



// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}

} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}


output = output.replaceAll("\s+"," ");

return output;

}


I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?









share















migration rejected from superuser.com yesterday


This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.





put on hold as unclear what you're asking by 200_success, Jamal yesterday


Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.















  • ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
    – jsem
    yesterday










  • i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
    – jsem
    yesterday












  • when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
    – jsem
    yesterday










  • Wait, you didn't write this code? How did you come by it?
    – bruglesco
    yesterday








  • 4




    This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
    – mdfst13
    yesterday














-1












-1








-1







I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:



function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";

if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{

preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}

}


function back_text($AppendText,$all_words,$row_back,$maxWords){

$added_next_text ="";
$text ="";


if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);

}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];

}

}

if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}

function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}



function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;


$keywords = array();
$all_words = array();

$countK = 1;
$countS = 1;
$tot_countS = 1;


static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";


$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}‌])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'

];

$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);

$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);



for ($i = 0; $i < count($result); $i++) {

if (trim($result[$i]) != '' || trim($result[$i]) != null)
{

if(preg_match("/(".$cool.")/u", $result[$i])){

$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}


$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;

$tot_countS = $tot_countS+1;
}

}

foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);

for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));

if($ff!= ''){



if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}


$count++;
}
}

$hitword =$dd;




$currKey_index = key($item);


if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}

if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}

$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);



$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}


return $output;

}

$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;

echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));


It works fast, but when I write the same functionality in Java for Android and test it was very slow:



public String getsnipit(String myString,String rhhlterm,String value){


String output = "";
String body;
try {
InputStream is = getAssets().open(myString);

/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/

body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));

//body = removeTag(body);

// body = striphtmlbutbr(body);
body = clean(body);



String words = rhhlterm;

words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");

String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);

String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";

int tot_countS = 0;
int countK = 0;

Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;

List<Cube> myCubes = new ArrayList<>();

for (int i = 0; i < list2.length; i++) {

if(isNullOrEmpty(list2[i].trim())==false){

Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {

// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;

if(countK >=5) {
break;
}
}


//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;



}



}


// Log.d("ggg",gfg1);

Integer count = 1;

Integer count2 = 0;



String back= null;
String next= null;

String next_text =null;
String back_text =null;

Integer maxWords = 7;


String hit = "";


for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");



for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){



if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));


}


count = count+1;;
}

}

// Log.d(" dd", dd);
hit = dd;

if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}


if(all_words.get(myCubes.get(i).index+1) != null){

next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}



// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}

} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}


output = output.replaceAll("\s+"," ");

return output;

}


I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?









share















I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:



function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";

if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{

preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}

}


function back_text($AppendText,$all_words,$row_back,$maxWords){

$added_next_text ="";
$text ="";


if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);

}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{

preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];

}

}

if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}

function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}



function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;


$keywords = array();
$all_words = array();

$countK = 1;
$countS = 1;
$tot_countS = 1;


static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";


$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}‌])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'

];

$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);

$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);



for ($i = 0; $i < count($result); $i++) {

if (trim($result[$i]) != '' || trim($result[$i]) != null)
{

if(preg_match("/(".$cool.")/u", $result[$i])){

$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}


$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;

$tot_countS = $tot_countS+1;
}

}

foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);

for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));

if($ff!= ''){



if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}


$count++;
}
}

$hitword =$dd;




$currKey_index = key($item);


if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}

if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}

$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);



$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}


return $output;

}

$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;

echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));


It works fast, but when I write the same functionality in Java for Android and test it was very slow:



public String getsnipit(String myString,String rhhlterm,String value){


String output = "";
String body;
try {
InputStream is = getAssets().open(myString);

/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/

body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));

//body = removeTag(body);

// body = striphtmlbutbr(body);
body = clean(body);



String words = rhhlterm;

words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");

String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);

String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";

int tot_countS = 0;
int countK = 0;

Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;

List<Cube> myCubes = new ArrayList<>();

for (int i = 0; i < list2.length; i++) {

if(isNullOrEmpty(list2[i].trim())==false){

Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {

// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;

if(countK >=5) {
break;
}
}


//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;



}



}


// Log.d("ggg",gfg1);

Integer count = 1;

Integer count2 = 0;



String back= null;
String next= null;

String next_text =null;
String back_text =null;

Integer maxWords = 7;


String hit = "";


for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");



for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){



if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));


}


count = count+1;;
}

}

// Log.d(" dd", dd);
hit = dd;

if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}


if(all_words.get(myCubes.get(i).index+1) != null){

next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}



// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}

} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}


output = output.replaceAll("\s+"," ");

return output;

}


I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?







java php





share














share












share



share








edited yesterday









Reinderien

3,842821




3,842821










asked yesterday









jsem

1




1




migration rejected from superuser.com yesterday


This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.





put on hold as unclear what you're asking by 200_success, Jamal yesterday


Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.






migration rejected from superuser.com yesterday


This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.





put on hold as unclear what you're asking by 200_success, Jamal yesterday


Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.














  • ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
    – jsem
    yesterday










  • i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
    – jsem
    yesterday












  • when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
    – jsem
    yesterday










  • Wait, you didn't write this code? How did you come by it?
    – bruglesco
    yesterday








  • 4




    This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
    – mdfst13
    yesterday


















  • ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
    – jsem
    yesterday










  • i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
    – jsem
    yesterday












  • when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
    – jsem
    yesterday










  • Wait, you didn't write this code? How did you come by it?
    – bruglesco
    yesterday








  • 4




    This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
    – mdfst13
    yesterday
















ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday




ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday












i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday






i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday














when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday




when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday












Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday






Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday






4




4




This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday




This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday










0






active

oldest

votes

















0






active

oldest

votes








0






active

oldest

votes









active

oldest

votes






active

oldest

votes

Popular posts from this blog

How to reconfigure Docker Trusted Registry 2.x.x to use CEPH FS mount instead of NFS and other traditional...

is 'sed' thread safe

How to make a Squid Proxy server?