Why are Java and C++ slower than PHP in Android? [on hold]
I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:
function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
function back_text($AppendText,$all_words,$row_back,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}
function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}
function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;
$keywords = array();
$all_words = array();
$countK = 1;
$countS = 1;
$tot_countS = 1;
static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";
$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'
];
$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);
$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
for ($i = 0; $i < count($result); $i++) {
if (trim($result[$i]) != '' || trim($result[$i]) != null)
{
if(preg_match("/(".$cool.")/u", $result[$i])){
$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}
$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;
$tot_countS = $tot_countS+1;
}
}
foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);
for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));
if($ff!= ''){
if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}
$count++;
}
}
$hitword =$dd;
$currKey_index = key($item);
if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}
if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}
$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);
$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}
return $output;
}
$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;
echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));
It works fast, but when I write the same functionality in Java for Android and test it was very slow:
public String getsnipit(String myString,String rhhlterm,String value){
String output = "";
String body;
try {
InputStream is = getAssets().open(myString);
/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/
body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));
//body = removeTag(body);
// body = striphtmlbutbr(body);
body = clean(body);
String words = rhhlterm;
words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");
String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);
String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";
int tot_countS = 0;
int countK = 0;
Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;
List<Cube> myCubes = new ArrayList<>();
for (int i = 0; i < list2.length; i++) {
if(isNullOrEmpty(list2[i].trim())==false){
Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {
// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;
if(countK >=5) {
break;
}
}
//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;
}
}
// Log.d("ggg",gfg1);
Integer count = 1;
Integer count2 = 0;
String back= null;
String next= null;
String next_text =null;
String back_text =null;
Integer maxWords = 7;
String hit = "";
for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");
for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){
if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));
}
count = count+1;;
}
}
// Log.d(" dd", dd);
hit = dd;
if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}
if(all_words.get(myCubes.get(i).index+1) != null){
next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}
// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}
} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}
output = output.replaceAll("\s+"," ");
return output;
}
I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?
java php
migration rejected from superuser.com yesterday
This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.
put on hold as unclear what you're asking by 200_success, Jamal♦ yesterday
Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.
comments disabled on deleted / locked posts / reviews |
show 1 more comment
I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:
function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
function back_text($AppendText,$all_words,$row_back,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}
function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}
function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;
$keywords = array();
$all_words = array();
$countK = 1;
$countS = 1;
$tot_countS = 1;
static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";
$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'
];
$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);
$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
for ($i = 0; $i < count($result); $i++) {
if (trim($result[$i]) != '' || trim($result[$i]) != null)
{
if(preg_match("/(".$cool.")/u", $result[$i])){
$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}
$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;
$tot_countS = $tot_countS+1;
}
}
foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);
for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));
if($ff!= ''){
if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}
$count++;
}
}
$hitword =$dd;
$currKey_index = key($item);
if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}
if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}
$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);
$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}
return $output;
}
$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;
echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));
It works fast, but when I write the same functionality in Java for Android and test it was very slow:
public String getsnipit(String myString,String rhhlterm,String value){
String output = "";
String body;
try {
InputStream is = getAssets().open(myString);
/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/
body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));
//body = removeTag(body);
// body = striphtmlbutbr(body);
body = clean(body);
String words = rhhlterm;
words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");
String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);
String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";
int tot_countS = 0;
int countK = 0;
Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;
List<Cube> myCubes = new ArrayList<>();
for (int i = 0; i < list2.length; i++) {
if(isNullOrEmpty(list2[i].trim())==false){
Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {
// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;
if(countK >=5) {
break;
}
}
//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;
}
}
// Log.d("ggg",gfg1);
Integer count = 1;
Integer count2 = 0;
String back= null;
String next= null;
String next_text =null;
String back_text =null;
Integer maxWords = 7;
String hit = "";
for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");
for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){
if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));
}
count = count+1;;
}
}
// Log.d(" dd", dd);
hit = dd;
if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}
if(all_words.get(myCubes.get(i).index+1) != null){
next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}
// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}
} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}
output = output.replaceAll("\s+"," ");
return output;
}
I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?
java php
migration rejected from superuser.com yesterday
This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.
put on hold as unclear what you're asking by 200_success, Jamal♦ yesterday
Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
4
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday
comments disabled on deleted / locked posts / reviews |
show 1 more comment
I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:
function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
function back_text($AppendText,$all_words,$row_back,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}
function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}
function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;
$keywords = array();
$all_words = array();
$countK = 1;
$countS = 1;
$tot_countS = 1;
static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";
$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'
];
$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);
$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
for ($i = 0; $i < count($result); $i++) {
if (trim($result[$i]) != '' || trim($result[$i]) != null)
{
if(preg_match("/(".$cool.")/u", $result[$i])){
$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}
$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;
$tot_countS = $tot_countS+1;
}
}
foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);
for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));
if($ff!= ''){
if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}
$count++;
}
}
$hitword =$dd;
$currKey_index = key($item);
if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}
if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}
$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);
$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}
return $output;
}
$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;
echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));
It works fast, but when I write the same functionality in Java for Android and test it was very slow:
public String getsnipit(String myString,String rhhlterm,String value){
String output = "";
String body;
try {
InputStream is = getAssets().open(myString);
/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/
body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));
//body = removeTag(body);
// body = striphtmlbutbr(body);
body = clean(body);
String words = rhhlterm;
words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");
String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);
String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";
int tot_countS = 0;
int countK = 0;
Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;
List<Cube> myCubes = new ArrayList<>();
for (int i = 0; i < list2.length; i++) {
if(isNullOrEmpty(list2[i].trim())==false){
Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {
// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;
if(countK >=5) {
break;
}
}
//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;
}
}
// Log.d("ggg",gfg1);
Integer count = 1;
Integer count2 = 0;
String back= null;
String next= null;
String next_text =null;
String back_text =null;
Integer maxWords = 7;
String hit = "";
for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");
for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){
if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));
}
count = count+1;;
}
}
// Log.d(" dd", dd);
hit = dd;
if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}
if(all_words.get(myCubes.get(i).index+1) != null){
next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}
// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}
} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}
output = output.replaceAll("\s+"," ");
return output;
}
I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?
java php
I created text snippet code in PHP to extract search key words to work with my search engine. Here is the code:
function next_text($AppendText,$all_words,$row_next,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_next])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = ' '.$all_words[$row_next];
$AppendText = $AppendText.$added_next_text;
return next_text($AppendText,$all_words,$row_next+1,$maxWords);
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
else{
preg_match('/^(?:S+s*){1,'.$maxWords.'}/', $AppendText, $match);
return $match[0]."..";
}
}
function back_text($AppendText,$all_words,$row_back,$maxWords){
$added_next_text ="";
$text ="";
if(isset($all_words[$row_back])){
if(mb_str_word_count($AppendText)< $maxWords){
$added_next_text = $all_words[$row_back].' ';
$AppendText = $added_next_text.$AppendText;
return back_text($AppendText,$all_words,$row_back-1,$maxWords);
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
else{
preg_match('/(?>S+s*){1,'.$maxWords.'}$/', $AppendText, $match);
return "..".$match[0];
}
}
if (!function_exists('mb_str_word_count'))
{
function mb_str_word_count($string, $format = 0, $charlist = '') {
$string=trim($string);
if(empty($string))
$words = array();
else
$words = preg_split('~[^p{L}p{N}']+~u',$string);
switch ($format) {
case 0:
return count($words);
break;
case 1:
case 2:
return $words;
break;
default:
return $words;
break;
}
}
}
function clean($txt)
{
$txt=preg_replace("{(<br[\s]*(>|/>)s*){2,}}i", "n", $txt);
$txt=preg_replace("{(<br[\s]*(>|/>)s*)}i", "n", $txt);
return $txt;
}
function hit_keys($str,$words,$maxWords,$data){
$row_next="";
$row_back="";
$back= null;
$next= null;
$index =null;
$next_text =null;
$back_text =null;
$keywords = array();
$all_words = array();
$countK = 1;
$countS = 1;
$tot_countS = 1;
static $count = 1;
static $output = "";
static $dd = "";
static $hitword = "";
end($all_words);
$lastword_index = key($all_words);
$currKey_index ="";
$corr = [
'ا' => '(?:[اإآأٱ]\p{Mn}+|[اإآأٱ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ب' => '(?:[ب]\p{Mn}+|[ب])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ت' => '(?:[ت]\p{Mn}+|[ت])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ث' => '(?:[ث]\p{Mn}+|[ث])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ج' => '(?:[ج]\p{Mn}+|[ج])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ح' => '(?:[ح]\p{Mn}+|[ح])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'خ' => '(?:[خ]\p{Mn}+|[خ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'د' => '(?:[د]\p{Mn}+|[د])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ذ' => '(?:[ذ]\p{Mn}+|[ذ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ر' => '(?:[ر]\p{Mn}+|[ر])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ز' => '(?:[ز]\p{Mn}+|[ز])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'س' => '(?:[س]\p{Mn}+|[س])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ش' => '(?:[ش]\p{Mn}+|[ش])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ص' => '(?:[ص]\p{Mn}+|[ص])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ض' => '(?:[ض]\p{Mn}+|[ض])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ط' => '(?:[ط]\p{Mn}+|[ط])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ظ' => '(?:[ظ]\p{Mn}+|[ظ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ع' => '(?:[ع]\p{Mn}+|[ع])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'غ' => '(?:[غ]\p{Mn}+|[غ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ف' => '(?:[ف]\p{Mn}+|[ف])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ق' => '(?:[ق]\p{Mn}+|[ق])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ك' => '(?:[ك]\p{Mn}+|[ك])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ل' => '(?:[ل]\p{Mn}+|[ل])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'م' => '(?:[م]\p{Mn}+|[م])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ن' => '(?:[ن]\p{Mn}+|[ن])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,'ه' => '(?:[ه|ة]\p{Mn}+|[ه|ة])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"و" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ؤ" =>'(?:[ؤو]\p{Mn}+|[ؤو])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ى" =>'(?:[ىئ]\p{Mn}+|[ىئ])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ي" =>'(?:[ي]\p{Mn}+|[ي])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
,"ء" =>'(?:[ء]\p{Mn}+|[ء])(?:[_ـ\x{200C}]\p{Mn}+|[_ـ\x{200C}])*'
//٠
,"0"=>'(?:0|\x{0660}|\؟|\?)'
,"1"=>'(?:1|۱|١)'
,"2"=>'(?:2|۲|٢)'
,"3"=>'(?:3|۳|٣)'
,"4"=>'(?:4|٤|٤)'
,"5"=>'(?:5|٥|٥)'
,"6"=>'(?:6|٦|٦)'
,"7"=>'(?:7|٧|٧)'
,"8"=>'(?:8|۸|٨)'
,"9"=>'(?:9|۹|٩)'
];
$str = preg_replace('/(<script[^>]*>.+?</script>|<style[^>]*>.+?</style>|<title[^>]*>.+?</title>)/s', '', $str);
$str = preg_replace('/((.*?)</(.*?)>)(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$4$5", $str);
$str = preg_replace('/(.*?)(<(.*?)[^>]*>(.*?))/is', "$1<br>$2", $str);
$str= strip_tags($str,'<br>');
$str = clean($str);
$cool = implode(preg_split('/\s/', strtr($words, $corr)), '(?:<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\x{0640}\x{200F}])*');
$result = preg_split("/s*([^s]*".$cool."[^s]*)s*/u", $str, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
for ($i = 0; $i < count($result); $i++) {
if (trim($result[$i]) != '' || trim($result[$i]) != null)
{
if(preg_match("/(".$cool.")/u", $result[$i])){
$keywords[$countK]= array_fill_keys([$tot_countS],$result[$i]);
$countK = $countK+1;
}
$text = trim(preg_replace('/s+/u', ' ', $result[$i]));
$all_words[$tot_countS] = $text;
$countS = $countS+1;
$tot_countS = $tot_countS+1;
}
}
foreach($keywords as $index => $item) {
$dd ="" ;
$gg = preg_split('/rn|r|n/', $item[key($item)]);
for ($i = 0; $i < count($gg); $i++) {
$ff = trim(preg_replace('/s/u', '', $gg[$i]));
if($ff!= ''){
if(strpos(trim($item[key($item)]), ' ') !== false)
{
$dd .= "<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">".$gg[$i]."</a> ";
}
else
{
$dd .= preg_replace('/('.$cool.')/us',"<a onclick = 'go_hit(this);' id =hit_".$count." href = ".$data."&hit=".$count.">$1</a> ",$gg[$i]);
}
$count++;
}
}
$hitword =$dd;
$currKey_index = key($item);
if(isset($all_words[key($item)-1])){
$back= $all_words[key($item)-1];
}
if(isset($all_words[key($item)+1])){
$next= $all_words[key($item)+1];
}
else{
$next ="";
}
$back_text = back_text($back,$all_words,(key($item)-1)-1,$maxWords);
$next_text = next_text($next,$all_words,(key($item)+1)+1,$maxWords);
$output .= "<div dir =rtl>".$back_text." ".$hitword ." ".$next_text."</div>";
}
return $output;
}
$data = null;
$wrords = "test";
$str = "hi this test of the snippet code";
$maxWords = 7;
echo htmlspecialchars_decode(hit_keys($str,$words,$maxWords,$data));
It works fast, but when I write the same functionality in Java for Android and test it was very slow:
public String getsnipit(String myString,String rhhlterm,String value){
String output = "";
String body;
try {
InputStream is = getAssets().open(myString);
/*int size = is.available();
byte buffer = new byte[size];
is.read(buffer);
is.close();*/
body = Jsoup.parse(is, "UTF-8", "jjj").body().html();
//.select("br").append("br")
body= fix_String(body);
body = Jsoup.clean(body,
"",
Whitelist.simpleText().addTags("br"),
new Document.OutputSettings().prettyPrint(true));
//body = removeTag(body);
// body = striphtmlbutbr(body);
body = clean(body);
String words = rhhlterm;
words = words.replaceAll("ا","([اإآأٱ]\\p{Mn}+|[اإآأٱ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ب","([ب]\\p{Mn}+|[ب])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ت","([ت]\\p{Mn}+|[ت])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ث","([ث]\\p{Mn}+|[ث])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ج","([ج]\\p{Mn}+|[ج])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ح","([ح]\\p{Mn}+|[ح])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("خ","([خ]\\p{Mn}+|[خ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("د","([د]\\p{Mn}+|[د])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ذ","([ذ]\\p{Mn}+|[ذ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ر","([ر]\\p{Mn}+|[ر])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ز","([ز]\\p{Mn}+|[ز])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("س","([س]\\p{Mn}+|[س])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ش","([ش]\\p{Mn}+|[ش])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ص","([ص]\\p{Mn}+|[ص])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ض","([ض]\\p{Mn}+|[ض])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ط","([ط]\\p{Mn}+|[ط])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ظ","([ظ]\\p{Mn}+|[ظ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ع","([ع]\\p{Mn}+|[ع])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("غ","([غ]\\p{Mn}+|[غ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ف","([ف]\\p{Mn}+|[ف])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ق","([ق]\\p{Mn}+|[ق])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ك","([ك]\\p{Mn}+|[ك])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ل","([ل]\\p{Mn}+|[ل])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("م","([م]\\p{Mn}+|[م])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ن","([ن]\\p{Mn}+|[ن])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ه","([ه|ة]\\p{Mn}+|[ه|ة])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("و","([ؤو]\\p{Mn}+|[ؤو])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ى","([ىئ]\\p{Mn}+|[ىئ])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ي","([ي]\\p{Mn}+|[ي])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*")
.replaceAll("ء","([ء]\\p{Mn}+|[ء])([_ـ\\u200C]\\p{Mn}+|[_ـ\\u200C])*");
String list = words.split( "\s");
String gfg1 = TextUtils.join("(<[^>]+>|[<br>\n\r\s\p{P}\p{S}\p{Mn}\u0640\u200F])*", list);
//Log.d("gfg1",gfg1);
String list2 = splitWithDelimiters(body,"\s*([^\s]*"+gfg1+"[^\s]*)\s*");
// String list3 ="";
int tot_countS = 0;
int countK = 0;
Map<Integer ,String > all_words = new HashMap<Integer ,String >();
String text;
List<Cube> myCubes = new ArrayList<>();
for (int i = 0; i < list2.length; i++) {
if(isNullOrEmpty(list2[i].trim())==false){
Pattern p = Pattern.compile(gfg1);
Matcher m = p .matcher(list2[i]);
if (m.find()) {
// list3 = list3 + list2[i] +countK+ "<br>";
// keywords.put(tot_countS , list2[i]);
// oa[countK] = keywords;
myCubes.add(new Cube(tot_countS,list2[i]));
countK = countK+1;
if(countK >=5) {
break;
}
}
//text = list2[i].replaceAll("\s+"," ");
text = Jsoup.parse(list2[i]).text();
all_words.put(tot_countS , text.trim());
tot_countS = tot_countS+1;
}
}
// Log.d("ggg",gfg1);
Integer count = 1;
Integer count2 = 0;
String back= null;
String next= null;
String next_text =null;
String back_text =null;
Integer maxWords = 7;
String hit = "";
for (int i = 0; i < myCubes.size(); i++) {
String dd ="";
String gg = splitWithDelimiters(myCubes.get(i).key, "\r\n|\r|\n");
for (int ii = 0; ii < gg.length; ii++) {
// Log.d("data",gg[ii]);
if(gg[ii].replaceAll("\s","")!= ""){
if(gg[ii].trim().contains(" ") != false) {
Log.d("data","g"+gg[ii]+"g");
dd += String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,gg[ii],value);
}
else{
Log.d("data",gg[ii]);
gg[ii] = Jsoup.parse(gg[ii]).text();
dd += gg[ii].replaceAll("("+gfg1+")",String.format("<a onclick = "go_hit(this);" id ="hit_%1$s" href ="%3$s&hit=%1$s">%2$s</a> ", count ,"$1",value));
}
count = count+1;;
}
}
// Log.d(" dd", dd);
hit = dd;
if(all_words.get(myCubes.get(i).index-1) != null){
back= all_words.get(myCubes.get(i).index -1);
back_text = back_text(back,all_words,(myCubes.get(i).index -1)-1,maxWords);
//back_text= Jsoup.parse( back_text).text();
}
if(all_words.get(myCubes.get(i).index+1) != null){
next= all_words.get(myCubes.get(i).index +1);
next_text = next_text(next,all_words,(myCubes.get(i).index +1)+1,maxWords);
//next_text= Jsoup.parse(next_text).text();
}
// break;
output += "<div>"+back_text +" "+hit+" "+next_text+"</div>";
count2++;
}
} catch (IOException e) {
Log.e("TAG", e.getLocalizedMessage(), e);
return null;
}
output = output.replaceAll("\s+"," ");
return output;
}
I also tried also C++. It was very slow too - so my question is: will using native code and calling it by Java in Android increase the performance, or must I write the whole project in a low-level language like C?
java php
java php
edited yesterday
Reinderien
3,842821
3,842821
asked yesterday
jsem
1
1
migration rejected from superuser.com yesterday
This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.
put on hold as unclear what you're asking by 200_success, Jamal♦ yesterday
Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.
migration rejected from superuser.com yesterday
This question came from our site for computer enthusiasts and power users. Votes, comments, and answers are locked due to the question being closed here, but it may be eligible for editing and reopening on the site where it originated.
put on hold as unclear what you're asking by 200_success, Jamal♦ yesterday
Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
4
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday
comments disabled on deleted / locked posts / reviews |
show 1 more comment
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
4
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
4
4
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday
comments disabled on deleted / locked posts / reviews |
show 1 more comment
0
active
oldest
votes
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
ok but why is very slow ?? i tried PHP on android i installed PHP server on android and run the php code above its was very fast the result but that was not the case with c++ and java
– jsem
yesterday
i don't have complete code i just test it with simple regex replace String for very long HTML text it take more time about 10 to 15 second to get the result just like the java code above but the php was very fast only 2 to 3 second and the result showed very fast
– jsem
yesterday
when i use regex and replace function in c++ and java on android the performance is slow but that not the case with php i think the guys who coded the php used c code to make the performance more effect
– jsem
yesterday
Wait, you didn't write this code? How did you come by it?
– bruglesco
yesterday
4
This was migrated from SuperUser, which seems reasonable. Unfortunately, this is not in the normal form for Code Review. Please read How to Ask and edit accordingly. In particular, the title should summarize what the code does, not explain what you want from a review. And the body should include a longer description. What input does it take? What output does it produce? It would also help if you included enough code to run an example rather than just the processing code.
– mdfst13
yesterday